markdown_syntax/
parse.rs

1//! Markdown source to AST. The entry points are the free [`parse`] function
2//! (maximal default dialect) and the [`SyntaxOptions::parse`] /
3//! [`SyntaxOptions::parse_strict`] methods. Parsing is tolerant: problems are
4//! collected as [`Diagnostic`]s rather than aborting.
5
6use alloc::{borrow::Cow, string::String, vec, vec::Vec};
7
8use crate::{
9    ast::*,
10    diagnostic::{Diagnostic, DiagnosticCode, DiagnosticSeverity},
11    entities::named_character_reference,
12    options::{SyntaxConfigError, SyntaxOptions},
13    span::Span,
14    validate::is_directive_name,
15};
16
17/// The result of a tolerant parse: the document plus any diagnostics gathered
18/// along the way (empty on a clean parse).
19#[derive(Clone, Debug, Eq, PartialEq)]
20pub struct ParseOutput {
21    /// The parsed document tree.
22    pub document: Document,
23    /// Diagnostics collected during parsing.
24    pub diagnostics: Vec<Diagnostic>,
25}
26
27/// The error returned by [`SyntaxOptions::parse_strict`].
28#[derive(Clone, Debug, Eq, PartialEq)]
29pub enum ParseStrictError {
30    /// The options themselves were contradictory.
31    Config(SyntaxConfigError),
32    /// An error-severity diagnostic was promoted to a hard failure.
33    Diagnostic(Diagnostic),
34}
35
36#[derive(Clone, Debug, Eq, PartialEq)]
37struct ParsedLinkResource {
38    destination: String,
39    destination_kind: LinkDestinationKind,
40    title: Option<String>,
41    title_kind: Option<LinkTitleKind>,
42}
43
44const REFERENCE_LABEL_MAX_CHARS: usize = 999;
45const WIKILINK_MAX_BYTES: usize = 999;
46
47#[derive(Clone, Copy, Debug)]
48struct Line<'a> {
49    text: &'a str,
50    eol: &'a str,
51    start: usize,
52    end: usize,
53    end_with_eol: usize,
54    /// True when this line reached the current container as a *lazy continuation*
55    /// — a line with no container marker that nonetheless continues an open
56    /// paragraph (CommonMark §5.2 laziness). Block constructs that must not be
57    /// started by a lazy line (e.g. a setext underline) consult this flag.
58    lazy: bool,
59}
60
61#[derive(Clone, Copy, Debug)]
62struct ListMarkerInfo<'a> {
63    ordered: bool,
64    start: Option<u64>,
65    delimiter: ListDelimiter,
66    indent: usize,
67    marker_len: usize,
68    content_indent: usize,
69    content: &'a str,
70}
71
72#[derive(Clone, Copy, Debug)]
73struct DescriptionMarker<'a> {
74    content_offset: usize,
75    content: &'a str,
76}
77
78#[derive(Clone, Debug)]
79struct DescriptionTerm {
80    marker_index: usize,
81    term_end: usize,
82    blank_after_term: bool,
83    source: String,
84    source_offset: usize,
85}
86
87#[derive(Clone, Copy, Debug, Eq, PartialEq)]
88enum HtmlBlockKind {
89    RawTag,
90    BlockTag,
91    Until(&'static str),
92    UntilBlank,
93}
94
95/// Parse `input` under the maximal default dialect ([`SyntaxOptions::default`]).
96/// Infallible and tolerant; sugar for `SyntaxOptions::default().parse(input)`.
97pub fn parse(input: &str) -> ParseOutput {
98    SyntaxOptions::default().parse(input)
99}
100
101impl SyntaxOptions {
102    /// Parse `input` under these options. Infallible and tolerant: a config
103    /// conflict (reachable only by hand-building contradictory `Constructs`) is
104    /// surfaced as an error diagnostic rather than a hard error. Call
105    /// [`SyntaxOptions::validate`] first for fail-fast config checking.
106    pub fn parse(&self, input: &str) -> ParseOutput {
107        match parse_checked(input, self) {
108            Ok(output) => output,
109            Err(error) => ParseOutput {
110                document: Document::default(),
111                diagnostics: vec![Diagnostic::new(
112                    DiagnosticSeverity::Error,
113                    DiagnosticCode::StrictParse,
114                    Span::new(0, input.len()),
115                    error.message(),
116                )],
117            },
118        }
119    }
120
121    /// Parse `input`, promoting a config conflict or any error-severity
122    /// diagnostic to a hard [`ParseStrictError`].
123    pub fn parse_strict(&self, input: &str) -> Result<ParseOutput, ParseStrictError> {
124        let output = parse_checked(input, self).map_err(ParseStrictError::Config)?;
125        if let Some(diagnostic) = output
126            .diagnostics
127            .iter()
128            .find(|diagnostic| diagnostic.severity == DiagnosticSeverity::Error)
129        {
130            return Err(ParseStrictError::Diagnostic(diagnostic.clone()));
131        }
132        Ok(output)
133    }
134}
135
136fn parse_checked(input: &str, options: &SyntaxOptions) -> Result<ParseOutput, SyntaxConfigError> {
137    options.validate()?;
138    // CommonMark treats a leading UTF-8 BOM (U+FEFF) as document-start noise, not
139    // content. Strip a single leading BOM; an interior BOM is left untouched.
140    let input = input.strip_prefix('\u{feff}').unwrap_or(input);
141    // CommonMark replaces U+0000 with U+FFFD during input preprocessing. Only
142    // allocate when a NUL is actually present; otherwise borrow the original.
143    let input: Cow<'_, str> = if input.contains('\u{0}') {
144        Cow::Owned(input.replace('\u{0}', "\u{fffd}"))
145    } else {
146        Cow::Borrowed(input)
147    };
148    let input = input.as_ref();
149    let mut diagnostics = Vec::new();
150    let definitions = collect_definitions(input, options);
151    let children = parse_blocks(input, 0, true, options, &definitions, &mut diagnostics);
152
153    Ok(ParseOutput {
154        document: Document {
155            meta: NodeMeta::new(Some(Span::new(0, input.len()))),
156            children,
157        },
158        diagnostics,
159    })
160}
161
162fn parse_blocks(
163    input: &str,
164    base_offset: usize,
165    allow_frontmatter: bool,
166    options: &SyntaxOptions,
167    definitions: &[String],
168    diagnostics: &mut Vec<Diagnostic>,
169) -> Vec<Block> {
170    let lines = collect_lines(input, base_offset);
171    parse_blocks_from_lines(&lines, allow_frontmatter, options, definitions, diagnostics)
172}
173
174fn parse_blocks_from_lines(
175    lines: &[Line<'_>],
176    allow_frontmatter: bool,
177    options: &SyntaxOptions,
178    definitions: &[String],
179    diagnostics: &mut Vec<Diagnostic>,
180) -> Vec<Block> {
181    let mut blocks = Vec::new();
182    let mut index = 0;
183
184    while index < lines.len() {
185        let line = lines[index];
186        if line.text.trim().is_empty() {
187            index += 1;
188            continue;
189        }
190        let after_definition_unbroken = index > 0
191            && !lines[index - 1].text.trim().is_empty()
192            && matches!(blocks.last(), Some(Block::Definition(_)));
193
194        if allow_frontmatter && index == 0 {
195            if let Some((block, next)) = parse_frontmatter(lines, index, options) {
196                blocks.push(block);
197                index = next;
198                continue;
199            }
200        }
201
202        if let Some((block, next)) =
203            parse_container_directive(lines, index, options, definitions, diagnostics)
204        {
205            blocks.push(block);
206            index = next;
207            continue;
208        }
209
210        if let Some((block, next)) = parse_math_block(lines, index, options) {
211            blocks.push(block);
212            index = next;
213            continue;
214        }
215
216        if let Some((block, next)) = parse_fenced_code(lines, index, options) {
217            blocks.push(block);
218            index = next;
219            continue;
220        }
221
222        if let Some((block, next)) =
223            parse_block_quote(lines, index, options, definitions, diagnostics)
224        {
225            blocks.push(block);
226            index = next;
227            continue;
228        }
229
230        if let Some(block) = parse_atx_heading(line, options, definitions) {
231            blocks.push(block);
232            index += 1;
233            continue;
234        }
235
236        if let Some(block) = parse_thematic_break(line) {
237            blocks.push(block);
238            index += 1;
239            continue;
240        }
241
242        if let Some((block, next)) = parse_list(lines, index, options, definitions, diagnostics) {
243            blocks.push(block);
244            index = next;
245            continue;
246        }
247
248        if let Some((block, next)) =
249            parse_footnote_definition(lines, index, options, definitions, diagnostics)
250        {
251            blocks.push(block);
252            index = next;
253            continue;
254        }
255
256        if let Some((block, next)) =
257            parse_definition(lines, index, options, after_definition_unbroken)
258        {
259            blocks.push(block);
260            index = next;
261            continue;
262        }
263
264        if let Some(block) = parse_leaf_directive(line, options, definitions, diagnostics) {
265            blocks.push(block);
266            index += 1;
267            continue;
268        }
269
270        if let Some((block, next)) = parse_html_block(lines, index, options) {
271            blocks.push(block);
272            index = next;
273            continue;
274        }
275
276        if let Some((block, next)) = parse_mdx_flow(lines, index, options, diagnostics) {
277            blocks.push(block);
278            index = next;
279            continue;
280        }
281
282        if !after_definition_unbroken {
283            if let Some((block, next)) = parse_indented_code(lines, index, options) {
284                blocks.push(block);
285                index = next;
286                continue;
287            }
288        }
289
290        if let Some((block, next)) = parse_table(lines, index, options, definitions, diagnostics) {
291            blocks.push(block);
292            index = next;
293            continue;
294        }
295
296        if let Some((block, next)) = parse_setext_heading(lines, index, options, definitions) {
297            blocks.push(block);
298            index = next;
299            continue;
300        }
301
302        if let Some((block, next)) =
303            parse_description_list(lines, index, options, definitions, diagnostics)
304        {
305            blocks.push(block);
306            index = next;
307            continue;
308        }
309
310        let (block, next) = parse_paragraph(lines, index, options, definitions, diagnostics);
311        blocks.push(block);
312        index = next;
313    }
314
315    blocks
316}
317
318fn collect_lines(input: &str, base_offset: usize) -> Vec<Line<'_>> {
319    let bytes = input.as_bytes();
320    let mut lines = Vec::new();
321    let mut start = 0;
322    let mut index = 0;
323
324    while index < bytes.len() {
325        match bytes[index] {
326            b'\n' => {
327                let end = index;
328                lines.push(Line {
329                    text: &input[start..end],
330                    eol: &input[index..index + 1],
331                    start: base_offset + start,
332                    end: base_offset + end,
333                    end_with_eol: base_offset + index + 1,
334                    lazy: false,
335                });
336                index += 1;
337                start = index;
338            }
339            b'\r' => {
340                let end = index;
341                let eol_end = if index + 1 < bytes.len() && bytes[index + 1] == b'\n' {
342                    index + 2
343                } else {
344                    index + 1
345                };
346                lines.push(Line {
347                    text: &input[start..end],
348                    eol: &input[index..eol_end],
349                    start: base_offset + start,
350                    end: base_offset + end,
351                    end_with_eol: base_offset + eol_end,
352                    lazy: false,
353                });
354                index = eol_end;
355                start = index;
356            }
357            _ => index += 1,
358        }
359    }
360
361    if start < bytes.len() || input.is_empty() {
362        lines.push(Line {
363            text: &input[start..],
364            eol: "",
365            start: base_offset + start,
366            end: base_offset + bytes.len(),
367            end_with_eol: base_offset + bytes.len(),
368            lazy: false,
369        });
370    }
371
372    lines
373}
374
375fn collect_definitions(input: &str, options: &SyntaxOptions) -> Vec<String> {
376    let mut diagnostics = Vec::new();
377    let blocks = parse_blocks(input, 0, true, options, &[], &mut diagnostics);
378    let mut definitions = Vec::new();
379    collect_definition_refs_from_blocks(&blocks, &mut definitions);
380    definitions
381}
382
383fn collect_definition_refs_from_blocks(blocks: &[Block], definitions: &mut Vec<String>) {
384    for block in blocks {
385        match block {
386            Block::Definition(definition) => {
387                if definitions
388                    .iter()
389                    .all(|identifier| identifier != &definition.identifier)
390                {
391                    definitions.push(definition.identifier.clone());
392                }
393            }
394            Block::BlockQuote(node) => {
395                collect_definition_refs_from_blocks(&node.children, definitions);
396            }
397            Block::Alert(node) => {
398                collect_definition_refs_from_blocks(&node.children, definitions);
399            }
400            Block::List(node) => {
401                for item in &node.children {
402                    collect_definition_refs_from_blocks(&item.children, definitions);
403                }
404            }
405            Block::DescriptionList(node) => {
406                for item in &node.children {
407                    for details in &item.details {
408                        collect_definition_refs_from_blocks(&details.children, definitions);
409                    }
410                }
411            }
412            Block::FootnoteDefinition(node) => {
413                collect_definition_refs_from_blocks(&node.children, definitions);
414            }
415            Block::ContainerDirective(node) => {
416                collect_definition_refs_from_blocks(&node.children, definitions);
417            }
418            _ => {}
419        }
420    }
421}
422
423fn parse_frontmatter(
424    lines: &[Line<'_>],
425    index: usize,
426    options: &SyntaxOptions,
427) -> Option<(Block, usize)> {
428    if !options.constructs.frontmatter {
429        return None;
430    }
431    let kind = frontmatter_fence_kind(lines[index].text)?;
432
433    let mut value = String::new();
434    let mut cursor = index + 1;
435    while cursor < lines.len() {
436        if frontmatter_fence_kind(lines[cursor].text) == Some(kind) {
437            let span = Span::new(lines[index].start, lines[cursor].end_with_eol);
438            return Some((
439                Block::Frontmatter(Frontmatter {
440                    meta: NodeMeta::new(Some(span)),
441                    kind,
442                    value,
443                }),
444                cursor + 1,
445            ));
446        }
447        push_line(&mut value, lines[cursor].text);
448        cursor += 1;
449    }
450
451    None
452}
453
454fn frontmatter_fence_kind(line: &str) -> Option<FrontmatterKind> {
455    match line.trim_end_matches([' ', '\t']) {
456        "---" => Some(FrontmatterKind::Yaml),
457        "+++" => Some(FrontmatterKind::Toml),
458        _ => None,
459    }
460}
461
462fn parse_container_directive(
463    lines: &[Line<'_>],
464    index: usize,
465    options: &SyntaxOptions,
466    definitions: &[String],
467    diagnostics: &mut Vec<Diagnostic>,
468) -> Option<(Block, usize)> {
469    if !options.constructs.directive_container {
470        return None;
471    }
472    let trimmed = trim_up_to_three_spaces(lines[index].text)?;
473    let Some((fence_len, opener_rest)) = directive_container_opener_prefix(trimmed) else {
474        return None;
475    };
476    let opener_base = lines[index].start + (lines[index].text.len() - trimmed.len()) + fence_len;
477
478    let Some((name, label_source, attributes, _consumed)) = parse_directive_opener(opener_rest)
479    else {
480        diagnostics.push(Diagnostic::new(
481            DiagnosticSeverity::Error,
482            DiagnosticCode::InvalidDirectiveName,
483            Span::new(lines[index].start, lines[index].end),
484            "container directive must have a valid name",
485        ));
486        return None;
487    };
488    let label_base = opener_base + name.len() + 1;
489
490    let mut content = String::new();
491    let mut cursor = index + 1;
492    let mut nested_fences = Vec::new();
493    while cursor < lines.len() {
494        let line = lines[cursor].text;
495        let trimmed = trim_up_to_three_spaces(line);
496        if let Some(trimmed) = trimmed {
497            if let Some(nested_len) = nested_fences.last().copied() {
498                if directive_container_closing_fence(trimmed, nested_len).is_some() {
499                    nested_fences.pop();
500                    push_line(&mut content, line);
501                    cursor += 1;
502                    continue;
503                }
504            } else if directive_container_closing_fence(trimmed, fence_len).is_some() {
505                let label = label_source
506                    .map(|source| {
507                        parse_inlines(source, label_base, options, definitions, diagnostics)
508                    })
509                    .unwrap_or_default();
510                let children = parse_blocks(
511                    &content,
512                    lines[index + 1].start,
513                    false,
514                    options,
515                    definitions,
516                    diagnostics,
517                );
518                return Some((
519                    Block::ContainerDirective(ContainerDirective {
520                        meta: NodeMeta::new(Some(Span::new(
521                            lines[index].start,
522                            lines[cursor].end_with_eol,
523                        ))),
524                        name,
525                        label,
526                        attributes,
527                        children,
528                    }),
529                    cursor + 1,
530                ));
531            }
532
533            if let Some((nested_len, nested_rest)) = directive_container_opener_prefix(trimmed) {
534                if parse_directive_opener(nested_rest).is_some() {
535                    nested_fences.push(nested_len);
536                }
537            }
538        }
539
540        push_line(&mut content, line);
541        cursor += 1;
542    }
543
544    diagnostics.push(Diagnostic::new(
545        DiagnosticSeverity::Error,
546        DiagnosticCode::UnclosedDirectiveContainer,
547        Span::new(lines[index].start, lines[index].end),
548        "container directive is missing a closing fence",
549    ));
550    Some((
551        Block::ContainerDirective(ContainerDirective {
552            meta: NodeMeta::new(Some(Span::new(
553                lines[index].start,
554                lines.last()?.end_with_eol,
555            ))),
556            name,
557            label: label_source
558                .map(|source| parse_inlines(source, label_base, options, definitions, diagnostics))
559                .unwrap_or_default(),
560            attributes,
561            children: parse_blocks(
562                &content,
563                lines
564                    .get(index + 1)
565                    .map(|line| line.start)
566                    .unwrap_or(lines[index].end),
567                false,
568                options,
569                definitions,
570                diagnostics,
571            ),
572        }),
573        lines.len(),
574    ))
575}
576
577fn directive_container_opener_prefix(input: &str) -> Option<(usize, &str)> {
578    let fence_len = input
579        .as_bytes()
580        .iter()
581        .take_while(|byte| **byte == b':')
582        .count();
583    if fence_len >= 3 {
584        Some((fence_len, &input[fence_len..]))
585    } else {
586        None
587    }
588}
589
590fn directive_container_closing_fence(input: &str, min_len: usize) -> Option<usize> {
591    let fence_len = input
592        .as_bytes()
593        .iter()
594        .take_while(|byte| **byte == b':')
595        .count();
596    if fence_len >= min_len && input[fence_len..].trim().is_empty() {
597        Some(fence_len)
598    } else {
599        None
600    }
601}
602
603fn parse_math_block(
604    lines: &[Line<'_>],
605    index: usize,
606    options: &SyntaxOptions,
607) -> Option<(Block, usize)> {
608    if !options.constructs.math_block {
609        return None;
610    }
611    // A math-flow opener is the fenced-code analogue: a `>=2` dollar run after
612    // 0–3 columns of indent, optionally followed by an "info"/meta string that
613    // must NOT contain another `$` (`$$ $$` is inline math, not a flow open).
614    // The opening indent is stripped (up to its own width) from each content
615    // line, exactly like a fenced code block.
616    let opener = trim_up_to_three_spaces(lines[index].text)?;
617    let fence_length = math_block_fence_length(opener)?;
618    let opening_indent = leading_indent_columns(lines[index].text);
619
620    let mut value = String::new();
621    let mut content_lines = 0usize;
622    let mut cursor = index + 1;
623    while cursor < lines.len() {
624        if let Some(close_line) = trim_up_to_three_spaces(lines[cursor].text) {
625            if math_block_fence_closes(close_line, fence_length) {
626                return Some((
627                    Block::MathBlock(MathBlock {
628                        meta: NodeMeta::new(Some(Span::new(
629                            lines[index].start,
630                            lines[cursor].end_with_eol,
631                        ))),
632                        value,
633                    }),
634                    cursor + 1,
635                ));
636            }
637        }
638        if content_lines > 0 {
639            // The previous content line's `eol` usually separates lines. This
640            // fallback only covers synthetic child input that lacks an EOL despite
641            // yielding another line.
642            ensure_line_separator(&mut value);
643        }
644        let stripped = strip_leading_indent_columns(lines[cursor].text, opening_indent);
645        value.push_str(&stripped);
646        value.push_str(lines[cursor].eol);
647        content_lines += 1;
648        cursor += 1;
649    }
650
651    // EOF closes the block (an unclosed opener runs to end of document); an
652    // immediate EOF after the opener yields an empty math block.
653    Some((
654        Block::MathBlock(MathBlock {
655            meta: NodeMeta::new(Some(Span::new(
656                lines[index].start,
657                lines.last()?.end_with_eol,
658            ))),
659            value,
660        }),
661        lines.len(),
662    ))
663}
664
665/// Length of the leading `$` run if `input` (already indent-stripped) is a valid
666/// math-flow opener: `>=2` dollars, then an info string with no further `$`.
667fn math_block_fence_length(input: &str) -> Option<usize> {
668    let length = input
669        .as_bytes()
670        .iter()
671        .take_while(|byte| **byte == b'$')
672        .count();
673    if length < 2 || input[length..].contains('$') {
674        return None;
675    }
676    Some(length)
677}
678
679/// A math-flow closing line (already indent-stripped) is a run of `>=length`
680/// dollars and nothing else (trailing whitespace aside).
681fn math_block_fence_closes(input: &str, length: usize) -> bool {
682    let count = input
683        .as_bytes()
684        .iter()
685        .take_while(|byte| **byte == b'$')
686        .count();
687    count >= length && input[count..].trim().is_empty()
688}
689
690fn parse_fenced_code(
691    lines: &[Line<'_>],
692    index: usize,
693    options: &SyntaxOptions,
694) -> Option<(Block, usize)> {
695    let line = fence_line(lines[index].text, options)?;
696    let (marker, length) = fence_start(line)?;
697    // CommonMark: up to N columns of indentation (N = the opening fence's
698    // indent, 0–3) are removed from each content line.
699    let opening_indent = leading_indent_columns(lines[index].text);
700    let info = line[length..].trim();
701    if marker == FenceMarker::Backtick && info.contains('`') {
702        return None;
703    }
704    let info = if info.is_empty() {
705        None
706    } else {
707        Some(unescape_string(info))
708    };
709
710    let mut value = String::new();
711    // Join content lines with `\n` while preserving a leading blank line: a
712    // fenced block can open with a blank content line, and `push_line`'s
713    // empty-output proxy cannot tell zero lines from one empty line, so it would
714    // drop that leading blank. Track the count explicitly (as parse_math_block).
715    let mut content_lines = 0usize;
716    let mut cursor = index + 1;
717    while cursor < lines.len() {
718        if let Some(close_line) = fence_line(lines[cursor].text, options) {
719            if fence_close(close_line, marker, length) {
720                return Some((
721                    Block::CodeBlock(CodeBlock {
722                        meta: NodeMeta::new(Some(Span::new(
723                            lines[index].start,
724                            lines[cursor].end_with_eol,
725                        ))),
726                        kind: CodeBlockKind::Fenced { marker, length },
727                        info,
728                        value,
729                    }),
730                    cursor + 1,
731                ));
732            }
733        }
734        if content_lines > 0 {
735            // The previous content line's `eol` usually separates lines. This
736            // fallback only covers synthetic child input that lacks an EOL despite
737            // yielding another line.
738            ensure_line_separator(&mut value);
739        }
740        let stripped = strip_leading_indent_columns(lines[cursor].text, opening_indent);
741        value.push_str(&stripped);
742        value.push_str(lines[cursor].eol);
743        content_lines += 1;
744        cursor += 1;
745    }
746    Some((
747        Block::CodeBlock(CodeBlock {
748            meta: NodeMeta::new(Some(Span::new(
749                lines[index].start,
750                lines.last()?.end_with_eol,
751            ))),
752            kind: CodeBlockKind::Fenced { marker, length },
753            info,
754            value,
755        }),
756        lines.len(),
757    ))
758}
759
760fn fence_line<'a>(line: &'a str, options: &SyntaxOptions) -> Option<&'a str> {
761    if options.constructs.indented_code {
762        trim_up_to_three_spaces(line)
763    } else {
764        Some(trim_ascii_start(line))
765    }
766}
767
768fn container_closed_after_unclosed_fence(
769    lines: &[Line<'_>],
770    cursor: usize,
771    last_content_index: usize,
772    content: &str,
773    options: &SyntaxOptions,
774) -> bool {
775    !lines[last_content_index].eol.is_empty()
776        && (cursor >= lines.len() || lines[cursor].text.trim().is_empty())
777        && content_has_unclosed_fenced_code(content, options)
778}
779
780fn content_has_unclosed_fenced_code(content: &str, options: &SyntaxOptions) -> bool {
781    let lines = collect_lines(content, 0);
782    let mut open_fence = None;
783    for line in lines {
784        let Some(trimmed) = fence_line(line.text, options) else {
785            continue;
786        };
787        if let Some((marker, length, has_nonblank_content)) = open_fence {
788            if fence_close(trimmed, marker, length) {
789                open_fence = None;
790            } else {
791                open_fence = Some((
792                    marker,
793                    length,
794                    has_nonblank_content || !trimmed.trim().is_empty(),
795                ));
796            }
797            continue;
798        }
799        let Some((marker, length)) = fence_start(trimmed) else {
800            continue;
801        };
802        let info = trimmed[length..].trim();
803        if marker != FenceMarker::Backtick || !info.contains('`') {
804            open_fence = Some((marker, length, false));
805        }
806    }
807    open_fence.is_some_and(|(_, _, has_nonblank_content)| !has_nonblank_content)
808}
809
810/// Recursively determines whether the innermost block reachable through this
811/// (already marker-stripped) block-quote content line is an OPEN paragraph —
812/// the only block kind that a following lazy continuation line may extend.
813///
814/// Nested quote markers are stripped one level at a time so that, e.g.,
815/// `> > a` reports that the deepest content `a` is an open paragraph (this is
816/// what lets a lazy line continue a paragraph buried inside several quotes).
817/// Indented code, blank lines, HTML blocks, and every other block start are
818/// reported as NOT-an-open-paragraph.
819fn block_quote_content_paragraph_open(content: &str, options: &SyntaxOptions) -> bool {
820    let Some(trimmed) = trim_up_to_three_spaces(content) else {
821        // >= 4 columns of indentation: indented code, never a paragraph.
822        return false;
823    };
824    if trimmed.is_empty() {
825        return false;
826    }
827    if let Some(rest) = trimmed.strip_prefix('>') {
828        let rest = rest.strip_prefix(' ').unwrap_or(rest);
829        return block_quote_content_paragraph_open(rest, options);
830    }
831    if let Some(marker) = list_marker_info(trimmed) {
832        let first_content = list_marker_first_content(trimmed, marker);
833        return block_quote_content_paragraph_open(&first_content, options);
834    }
835    !lazy_line_starts_block(trimmed, options)
836}
837
838/// Whether a line starts a block for the purpose of LAZY-continuation
839/// suppression. Identical to [`likely_block_start`] except that *every* HTML
840/// block start — including the type-7 "complete tag" form that cannot interrupt
841/// a paragraph with a marker present — blocks lazy continuation. A bare `<a>`
842/// after `> a` must close the quote, not be absorbed as paragraph text.
843fn lazy_line_starts_block(input: &str, options: &SyntaxOptions) -> bool {
844    likely_block_start(input, options)
845        || (options.constructs.html_block && line_starts_html_block(input))
846        // A lazy line that almost opens a fenced code block — any fence-char
847        // run after up to three spaces of indent — ends the paragraph instead
848        // of continuing it (GH-19): `> x\n``\n` closes the quote rather than
849        // joining `` ` `` onto the paragraph.
850        || trim_up_to_three_spaces(input).is_some_and(|t| t.starts_with('`') || t.starts_with('~'))
851}
852
853fn parse_block_quote(
854    lines: &[Line<'_>],
855    index: usize,
856    options: &SyntaxOptions,
857    definitions: &[String],
858    diagnostics: &mut Vec<Diagnostic>,
859) -> Option<(Block, usize)> {
860    if !trim_up_to_three_spaces(lines[index].text)?.starts_with('>') {
861        return None;
862    }
863
864    let mut content = String::new();
865    // Lazy provenance per collected content line, parallel to the `\n`-joined
866    // `content`. Re-split (`collect_lines`) lines map 1:1 to these flags, so the
867    // child parser can suppress lazy-only constructs (e.g. setext underlines).
868    let mut lazy_flags: Vec<bool> = Vec::new();
869    let mut cursor = index;
870    let mut paragraph_open = false;
871    let mut in_table = false;
872    let mut last_content_line: Option<String> = None;
873    let mut content_base_offset = None;
874    while cursor < lines.len() {
875        let raw = lines[cursor].text;
876        let trimmed_opt = trim_up_to_three_spaces(raw);
877        let marked = trimmed_opt.is_some_and(|trimmed| trimmed.starts_with('>'));
878        let quote_rest_owned: String;
879        if let Some(trimmed) = trimmed_opt {
880            if trimmed.is_empty() {
881                break;
882            }
883        }
884        let (line, line_start) = if marked {
885            let trimmed = trimmed_opt.expect("marked implies a trimmed line");
886            let trimmed_start = lines[cursor].start + (raw.len() - trimmed.len());
887            let mut rest_start = 1;
888            let mut rest = &trimmed[rest_start..];
889            if rest.starts_with(' ') {
890                rest_start += 1;
891                rest = &rest[1..];
892            } else if rest.starts_with('\t') {
893                let marker_end_column = leading_indent_columns(raw) + 1;
894                match strip_leading_indent_columns_from(rest, 1, marker_end_column) {
895                    Cow::Borrowed(stripped) => rest = stripped,
896                    Cow::Owned(stripped) => {
897                        quote_rest_owned = stripped;
898                        rest = &quote_rest_owned;
899                    }
900                }
901            }
902            (rest, trimmed_start + rest_start)
903        } else if in_table {
904            // An open GFM table absorbs unmarked rows (lazy table body); a
905            // non-row unmarked line ends the quote.
906            break;
907        } else if paragraph_open && !lazy_line_starts_block(raw, options) {
908            // Lazy paragraph continuation: a marker-less line that continues an
909            // open paragraph (possibly nested). The RAW line is used verbatim —
910            // its indentation (even >= 4 columns) is paragraph text, not code.
911            (raw, lines[cursor].start)
912        } else {
913            break;
914        };
915
916        let mut escaped_lazy = String::new();
917        let line = if !marked
918            && last_content_line.as_deref().is_some_and(|previous| {
919                table_can_start_source(
920                    previous,
921                    line,
922                    options.constructs.indented_code,
923                    options.constructs.spoiler,
924                )
925            }) {
926            escaped_lazy.push_str(line);
927            if let Some(offset) = escaped_lazy.find('-') {
928                escaped_lazy.insert(offset, '\\');
929            }
930            &escaped_lazy
931        } else {
932            line
933        };
934
935        let starts_table = last_content_line.as_deref().is_some_and(|previous| {
936            table_can_start_source(
937                previous,
938                line,
939                options.constructs.indented_code,
940                options.constructs.spoiler,
941            )
942        });
943        if marked && starts_table {
944            paragraph_open = false;
945            in_table = true;
946        } else if marked && in_table && block_quote_table_body_row(line, options) {
947            paragraph_open = false;
948        } else {
949            in_table = false;
950            // Track the innermost open paragraph across nested quote markers so a
951            // following lazy line can reach a paragraph buried in nested quotes.
952            paragraph_open = block_quote_content_paragraph_open(line, options);
953        }
954        last_content_line = Some(line.into());
955        if content_base_offset.is_none() {
956            content_base_offset = Some(line_start);
957        }
958        push_line(&mut content, line);
959        lazy_flags.push(!marked);
960        cursor += 1;
961    }
962
963    let span = Span::new(lines[index].start, lines[cursor - 1].end_with_eol);
964    let child_base_offset = content_base_offset.unwrap_or(lines[index].start);
965    if !lines[cursor - 1].eol.is_empty() && !ends_with_line_ending(&content) {
966        content.push_str(lines[cursor - 1].eol);
967    }
968    if container_closed_after_unclosed_fence(lines, cursor, cursor - 1, &content, options) {
969        content.push('\n');
970    }
971    if let Some(alert) = parse_alert_from_block_quote(
972        &content,
973        child_base_offset,
974        span,
975        options,
976        definitions,
977        diagnostics,
978    ) {
979        return Some((alert, cursor));
980    }
981
982    let mut child_lines = collect_lines(&content, child_base_offset);
983    for (child, &lazy) in child_lines.iter_mut().zip(lazy_flags.iter()) {
984        child.lazy = lazy;
985    }
986    let children = parse_blocks_from_lines(&child_lines, false, options, definitions, diagnostics);
987    Some((
988        Block::BlockQuote(BlockQuote {
989            meta: NodeMeta::new(Some(span)),
990            children,
991        }),
992        cursor,
993    ))
994}
995
996fn parse_alert_from_block_quote(
997    content: &str,
998    base_offset: usize,
999    span: Span,
1000    options: &SyntaxOptions,
1001    definitions: &[String],
1002    diagnostics: &mut Vec<Diagnostic>,
1003) -> Option<Block> {
1004    if !options.constructs.gfm_alert {
1005        return None;
1006    }
1007    let (first_line, rest) = content.split_once('\n').unwrap_or((content, ""));
1008    let (kind, title) = parse_alert_marker(first_line)?;
1009    let rest_base_offset = base_offset + first_line.len() + usize::from(!rest.is_empty());
1010    let children = if rest.is_empty() {
1011        Vec::new()
1012    } else {
1013        parse_blocks(
1014            rest,
1015            rest_base_offset,
1016            false,
1017            options,
1018            definitions,
1019            diagnostics,
1020        )
1021    };
1022    Some(Block::Alert(Alert {
1023        meta: NodeMeta::new(Some(span)),
1024        kind,
1025        title,
1026        children,
1027    }))
1028}
1029
1030fn parse_alert_marker(line: &str) -> Option<(AlertKind, Option<String>)> {
1031    let close = line.find(']')?;
1032    let marker = line.get(0..close + 1)?;
1033    if !marker.starts_with("[!") {
1034        return None;
1035    }
1036    let kind = match &marker[2..close].to_ascii_lowercase()[..] {
1037        "note" => AlertKind::Note,
1038        "tip" => AlertKind::Tip,
1039        "important" => AlertKind::Important,
1040        "warning" => AlertKind::Warning,
1041        "caution" => AlertKind::Caution,
1042        _ => return None,
1043    };
1044    let title = line[close + 1..].trim();
1045    Some((
1046        kind,
1047        if title.is_empty() {
1048            None
1049        } else {
1050            Some(title.into())
1051        },
1052    ))
1053}
1054
1055fn block_quote_table_body_row(line: &str, options: &SyntaxOptions) -> bool {
1056    table_indent_line(line, options.constructs.indented_code).is_some_and(|row| {
1057        !row.trim().is_empty() && contains_unescaped_pipe(row, options.constructs.spoiler)
1058    })
1059}
1060
1061fn parse_list(
1062    lines: &[Line<'_>],
1063    index: usize,
1064    options: &SyntaxOptions,
1065    definitions: &[String],
1066    diagnostics: &mut Vec<Diagnostic>,
1067) -> Option<(Block, usize)> {
1068    let first_marker = list_marker_info(lines[index].text)?;
1069    let mut items = Vec::new();
1070    let mut cursor = index;
1071    let mut tight = true;
1072
1073    while cursor < lines.len() {
1074        // A thematic break (`* * *`, `---`, …) outranks a list marker at the same
1075        // position: it ends the list rather than opening a nested item. Test it
1076        // before accepting the line as a marker (precedence belongs at the call
1077        // site, not inside `list_marker_info`).
1078        if parse_thematic_break(lines[cursor]).is_some() {
1079            break;
1080        }
1081        let Some(marker) = list_marker_info(lines[cursor].text) else {
1082            break;
1083        };
1084        if !same_list_marker(first_marker, marker) {
1085            break;
1086        }
1087
1088        let item_start = cursor;
1089        let mut item_end = cursor;
1090        let mut item_tight = true;
1091        // Byte offsets within `content` at which an item-internal blank line
1092        // sits. After the item's children are parsed, a blank loosens the item
1093        // only when it falls in the GAP between two consecutive top-level
1094        // children (a direct separator); a blank absorbed inside a nested
1095        // container's span does not (per-list tightness).
1096        let mut item_blank_offsets: Vec<usize> = Vec::new();
1097        let mut content = String::new();
1098        // Lazy provenance per collected content line (parallel to the `\n`-joined
1099        // `content`, mapped 1:1 by the re-split `collect_lines`). A line is lazy
1100        // when it reached the item only as a paragraph continuation while
1101        // dedented below the item's content start: it is paragraph text and must
1102        // not begin a new block (e.g. `- d\n    - e` keeps `- e` as the lazy tail
1103        // of `d`'s paragraph, not a sublist — CommonMark "too few spaces").
1104        let mut lazy_flags: Vec<bool> = Vec::new();
1105        let mut open_fence = None;
1106        let first_content = list_marker_first_content(lines[cursor].text, marker);
1107        let mut last_content_line: Option<String> = Some(first_content.as_ref().into());
1108        let mut paragraph_open = list_item_paragraph_stays_open(None, &first_content, options);
1109        // CommonMark §5.2: a list item can begin with at most one blank line.
1110        // When the marker has no content the item starts blank, and the first
1111        // following blank line ends it — later indented content cannot join
1112        // (`-\n\n  foo` → empty item + separate paragraph).
1113        let mut item_started_blank = first_content.trim().is_empty();
1114        push_line(&mut content, &first_content);
1115        lazy_flags.push(false);
1116        update_list_item_fence(&first_content, &mut open_fence);
1117        cursor += 1;
1118
1119        while cursor < lines.len() {
1120            if lines[cursor].text.trim().is_empty() {
1121                // Blank/whitespace lines inside an open fenced code block are
1122                // verbatim code content, not item-ending blanks: keep them.
1123                if open_fence.is_some() {
1124                    let stripped = strip_list_continuation(
1125                        lines[cursor].text,
1126                        marker.content_indent,
1127                        first_marker.indent,
1128                    );
1129                    push_line(&mut content, &stripped);
1130                    lazy_flags.push(false);
1131                    update_list_item_fence(&stripped, &mut open_fence);
1132                    item_end = cursor;
1133                    cursor += 1;
1134                    continue;
1135                }
1136                let next = next_nonblank_line(lines, cursor + 1);
1137                if item_started_blank
1138                    || next >= lines.len()
1139                    || sibling_list_marker_at_line(
1140                        lines[next].text,
1141                        first_marker,
1142                        marker.content_indent,
1143                    )
1144                    || leading_indent_columns(lines[next].text) < marker.content_indent
1145                {
1146                    if next < lines.len()
1147                        && sibling_list_marker_at_line(
1148                            lines[next].text,
1149                            first_marker,
1150                            marker.content_indent,
1151                        )
1152                    {
1153                        item_tight = false;
1154                    }
1155                    cursor = next;
1156                    break;
1157                }
1158                // A blank between item content is recorded; whether it actually
1159                // loosens THIS list is decided structurally after the item's
1160                // children are parsed (a blank buried in a nested sublist must
1161                // not loosen the outer list — CommonMark requires the item to
1162                // *directly* contain the blank-separated blocks). Track the blank
1163                // line's offset within the collected content so the structural
1164                // check can tell a direct-child separator from a nested one.
1165                item_blank_offsets.push(content.len() + usize::from(!content.is_empty()));
1166                paragraph_open = false;
1167                push_line(&mut content, "");
1168                lazy_flags.push(false);
1169                item_end = cursor;
1170                cursor += 1;
1171                continue;
1172            }
1173
1174            item_started_blank = false;
1175
1176            if sibling_list_marker_at_line(lines[cursor].text, first_marker, marker.content_indent)
1177            {
1178                break;
1179            }
1180
1181            // A list marker of a different type/delimiter is a block boundary
1182            // (CommonMark §5.3: changing the marker starts a new list). It is not
1183            // a same-list sibling, so it would otherwise be absorbed as lazy
1184            // paragraph text — break the item instead so a new list can start.
1185            if leading_indent_columns(lines[cursor].text) < marker.content_indent
1186                && !same_list_marker_line(lines[cursor].text, first_marker)
1187                && list_marker_info(lines[cursor].text).is_some()
1188            {
1189                break;
1190            }
1191
1192            if leading_indent_columns(lines[cursor].text) < marker.content_indent {
1193                if likely_block_start(lines[cursor].text, options) || !paragraph_open {
1194                    break;
1195                }
1196            }
1197
1198            // A line dedented below the item's content start only stays in the
1199            // item as a lazy paragraph continuation (it reached here because a
1200            // paragraph was open). Mark it lazy so the re-parse keeps it as
1201            // paragraph text rather than letting a stripped `- e`/`> q`/`# h`
1202            // begin a fresh block inside the item.
1203            let lazy = paragraph_open
1204                && leading_indent_columns(lines[cursor].text) < marker.content_indent;
1205            let stripped = strip_list_continuation(
1206                lines[cursor].text,
1207                marker.content_indent,
1208                first_marker.indent,
1209            );
1210            let starts_table = last_content_line.as_deref().is_some_and(|previous| {
1211                table_can_start_source(
1212                    previous,
1213                    &stripped,
1214                    options.constructs.indented_code,
1215                    options.constructs.spoiler,
1216                )
1217            });
1218            paragraph_open = if starts_table {
1219                false
1220            } else {
1221                list_item_paragraph_stays_open(Some(paragraph_open), &stripped, options)
1222            };
1223            push_line(&mut content, &stripped);
1224            lazy_flags.push(lazy);
1225            update_list_item_fence(&stripped, &mut open_fence);
1226            last_content_line = Some(stripped.into_owned());
1227            item_end = cursor;
1228            cursor += 1;
1229        }
1230
1231        let child_base = lines[item_start].start + marker.content_indent;
1232        if !lines[item_end].eol.is_empty() && !ends_with_line_ending(&content) {
1233            content.push_str(lines[item_end].eol);
1234        }
1235        if container_closed_after_unclosed_fence(lines, cursor, item_end, &content, options) {
1236            content.push('\n');
1237        }
1238        let mut child_lines = collect_lines(&content, child_base);
1239        for (child, &lazy) in child_lines.iter_mut().zip(lazy_flags.iter()) {
1240            child.lazy = lazy;
1241        }
1242        let mut children =
1243            parse_blocks_from_lines(&child_lines, false, options, definitions, diagnostics);
1244        let checked = if options.constructs.gfm_task_list_item {
1245            take_task_marker_from_children(&mut children)
1246        } else {
1247            None
1248        };
1249
1250        if item_tight
1251            && blank_separates_top_level_blocks(&item_blank_offsets, &children, child_base)
1252        {
1253            item_tight = false;
1254        }
1255        tight = tight && item_tight;
1256        items.push(ListItem {
1257            meta: NodeMeta::new(Some(Span::new(
1258                lines[item_start].start,
1259                lines[item_end].end_with_eol,
1260            ))),
1261            checked,
1262            children,
1263        });
1264    }
1265
1266    Some((
1267        Block::List(List {
1268            meta: NodeMeta::new(Some(Span::new(
1269                lines[index].start,
1270                lines[cursor - 1].end_with_eol,
1271            ))),
1272            ordered: first_marker.ordered,
1273            start: first_marker.start,
1274            delimiter: first_marker.delimiter,
1275            tight,
1276            children: items,
1277        }),
1278        cursor,
1279    ))
1280}
1281
1282/// Whether an item-internal blank line directly separates two of the item's own
1283/// top-level block children — which loosens the list. A blank loosens the item
1284/// when some top-level child STARTS after the blank: that child was split off
1285/// from the preceding content by the blank. A blank with no top-level child
1286/// starting after it was either trailing or absorbed into a nested container
1287/// (e.g. a sublist), so it does not loosen the outer list — CommonMark only
1288/// counts blank lines between blocks the item *directly* contains, and per-list
1289/// tightness keeps a sublist's internal blank from propagating outward.
1290///
1291/// Blank offsets and child spans share the `child_base` content origin (both
1292/// were produced from the same stripped item content), so the comparison is in
1293/// one coordinate space.
1294fn blank_separates_top_level_blocks(
1295    blank_offsets: &[usize],
1296    children: &[Block],
1297    child_base: usize,
1298) -> bool {
1299    if blank_offsets.is_empty() || children.len() < 2 {
1300        return false;
1301    }
1302    let Some(&first_blank) = blank_offsets.iter().min() else {
1303        return false;
1304    };
1305    children.iter().any(|child| {
1306        block_span(child).is_some_and(|span| span.start.saturating_sub(child_base) > first_blank)
1307    })
1308}
1309
1310fn block_span(block: &Block) -> Option<Span> {
1311    let meta = match block {
1312        Block::Paragraph(node) => &node.meta,
1313        Block::Heading(node) => &node.meta,
1314        Block::ThematicBreak(node) => &node.meta,
1315        Block::BlockQuote(node) => &node.meta,
1316        Block::Alert(node) => &node.meta,
1317        Block::List(node) => &node.meta,
1318        Block::DescriptionList(node) => &node.meta,
1319        Block::CodeBlock(node) => &node.meta,
1320        Block::HtmlBlock(node) => &node.meta,
1321        Block::Definition(node) => &node.meta,
1322        Block::FootnoteDefinition(node) => &node.meta,
1323        Block::Table(node) => &node.meta,
1324        Block::MathBlock(node) => &node.meta,
1325        Block::Frontmatter(node) => &node.meta,
1326        Block::MdxEsm(node) => &node.meta,
1327        Block::MdxExpression(node) => &node.meta,
1328        Block::MdxJsx(node) => &node.meta,
1329        Block::LeafDirective(node) => &node.meta,
1330        Block::ContainerDirective(node) => &node.meta,
1331    };
1332    meta.span
1333}
1334
1335fn list_item_paragraph_stays_open(
1336    previous_open: Option<bool>,
1337    line: &str,
1338    options: &SyntaxOptions,
1339) -> bool {
1340    if line.trim().is_empty() {
1341        return false;
1342    }
1343    if previous_open == Some(false) {
1344        return false;
1345    }
1346    block_quote_content_paragraph_open(line, options)
1347}
1348
1349fn parse_description_list(
1350    lines: &[Line<'_>],
1351    index: usize,
1352    options: &SyntaxOptions,
1353    definitions: &[String],
1354    diagnostics: &mut Vec<Diagnostic>,
1355) -> Option<(Block, usize)> {
1356    if !options.constructs.description_list || !is_description_term_line(lines[index].text, options)
1357    {
1358        return None;
1359    }
1360
1361    let mut cursor = index;
1362    let mut items = Vec::new();
1363    let mut tight = true;
1364    let mut list_end = lines[index].end_with_eol;
1365
1366    while cursor < lines.len() {
1367        if !is_description_term_line(lines[cursor].text, options) {
1368            break;
1369        }
1370        let Some(term) = description_term(lines, cursor, options) else {
1371            break;
1372        };
1373        let term_line = lines[cursor];
1374        let mut details = Vec::new();
1375        let item_start = term_line.start;
1376        let mut item_end = lines[term.term_end].end_with_eol;
1377        tight = tight && !term.blank_after_term;
1378        cursor = term.marker_index;
1379
1380        loop {
1381            let Some(marker) = description_marker(lines[cursor].text) else {
1382                break;
1383            };
1384            let (detail, next, detail_tight) = parse_description_details(
1385                lines,
1386                cursor,
1387                marker,
1388                options,
1389                definitions,
1390                diagnostics,
1391            )?;
1392            tight = tight && detail_tight;
1393            item_end = detail
1394                .meta
1395                .span
1396                .map(|span| span.end)
1397                .unwrap_or(lines[cursor].end_with_eol);
1398            details.push(detail);
1399            cursor = next;
1400
1401            let next_nonblank = next_nonblank_line(lines, cursor);
1402            if next_nonblank < lines.len()
1403                && description_marker(lines[next_nonblank].text).is_some()
1404            {
1405                if next_nonblank != cursor {
1406                    tight = false;
1407                }
1408                cursor = next_nonblank;
1409                continue;
1410            }
1411            break;
1412        }
1413
1414        if details.is_empty() {
1415            return None;
1416        }
1417        list_end = item_end;
1418        items.push(DescriptionItem {
1419            meta: NodeMeta::new(Some(Span::new(item_start, item_end))),
1420            term: parse_inlines(
1421                &term.source,
1422                term.source_offset,
1423                options,
1424                definitions,
1425                diagnostics,
1426            ),
1427            details,
1428        });
1429
1430        let next_item = next_nonblank_line(lines, cursor);
1431        if next_item >= lines.len() {
1432            cursor = next_item;
1433            break;
1434        }
1435        if description_term(lines, next_item, options).is_some() {
1436            if next_item != cursor {
1437                tight = false;
1438            }
1439            cursor = next_item;
1440            continue;
1441        }
1442        cursor = next_item;
1443        break;
1444    }
1445
1446    (!items.is_empty()).then_some((
1447        Block::DescriptionList(DescriptionList {
1448            meta: NodeMeta::new(Some(Span::new(lines[index].start, list_end))),
1449            tight,
1450            children: items,
1451        }),
1452        cursor,
1453    ))
1454}
1455
1456fn parse_description_details(
1457    lines: &[Line<'_>],
1458    index: usize,
1459    marker: DescriptionMarker<'_>,
1460    options: &SyntaxOptions,
1461    definitions: &[String],
1462    diagnostics: &mut Vec<Diagnostic>,
1463) -> Option<(DescriptionDetails, usize, bool)> {
1464    let mut content = String::new();
1465    push_line(&mut content, marker.content);
1466    let mut cursor = index + 1;
1467    let mut end = lines[index].end_with_eol;
1468    let mut tight = true;
1469    let mut paragraph_open = paragraph_stays_open(marker.content, options);
1470
1471    while cursor < lines.len() {
1472        if lines[cursor].text.trim().is_empty() {
1473            let next = next_nonblank_line(lines, cursor + 1);
1474            // A blank that merely separates this definition from a following
1475            // `:`/`~` marker (another definition of the SAME term) is
1476            // content-separating, so it loosens the list. A blank that ends the
1477            // item — because the next non-blank line begins a new TERM, or the
1478            // document ends — is just an item boundary and must NOT loosen the
1479            // list (such blank-separated term groups stay tight).
1480            if next >= lines.len() || description_term(lines, next, options).is_some() {
1481                cursor = next;
1482                break;
1483            }
1484            if description_marker(lines[next].text).is_some() {
1485                tight = false;
1486                cursor = next;
1487                break;
1488            }
1489            if strip_indent_continuation(lines[next].text).is_none() {
1490                break;
1491            }
1492            push_line(&mut content, "");
1493            paragraph_open = false;
1494            tight = false;
1495            end = lines[cursor].end_with_eol;
1496            cursor += 1;
1497            continue;
1498        }
1499
1500        if description_marker(lines[cursor].text).is_some()
1501            || description_term(lines, cursor, options).is_some()
1502        {
1503            break;
1504        }
1505
1506        let continuation = if let Some(continuation) = strip_indent_continuation(lines[cursor].text)
1507        {
1508            continuation
1509        } else if paragraph_open && !likely_block_start(lines[cursor].text, options) {
1510            trim_ascii_start(lines[cursor].text)
1511        } else {
1512            break;
1513        };
1514        paragraph_open = paragraph_stays_open(continuation, options);
1515        push_line(&mut content, continuation);
1516        end = lines[cursor].end_with_eol;
1517        cursor += 1;
1518    }
1519
1520    if content.trim().is_empty() {
1521        return None;
1522    }
1523
1524    Some((
1525        DescriptionDetails {
1526            meta: NodeMeta::new(Some(Span::new(lines[index].start, end))),
1527            children: parse_blocks(
1528                &content,
1529                lines[index].start + marker.content_offset,
1530                false,
1531                options,
1532                definitions,
1533                diagnostics,
1534            ),
1535        },
1536        cursor,
1537        tight,
1538    ))
1539}
1540
1541fn description_term(
1542    lines: &[Line<'_>],
1543    term_index: usize,
1544    options: &SyntaxOptions,
1545) -> Option<DescriptionTerm> {
1546    if term_index >= lines.len() || !is_description_term_line(lines[term_index].text, options) {
1547        return None;
1548    }
1549    let mut source = String::new();
1550    let mut term_end = term_index;
1551    let mut cursor = term_index;
1552    while cursor < lines.len() && is_description_term_line(lines[cursor].text, options) {
1553        if !source.is_empty() {
1554            source.push('\n');
1555        }
1556        source.push_str(trim_ascii_start(lines[cursor].text).trim_end());
1557        term_end = cursor;
1558        cursor += 1;
1559    }
1560
1561    let mut marker_index = cursor;
1562    let mut blank_after_term = false;
1563    while marker_index < lines.len() && lines[marker_index].text.trim().is_empty() {
1564        blank_after_term = true;
1565        marker_index += 1;
1566    }
1567    (marker_index < lines.len() && description_marker(lines[marker_index].text).is_some()).then(
1568        || DescriptionTerm {
1569            marker_index,
1570            term_end,
1571            blank_after_term,
1572            source,
1573            source_offset: lines[term_index].start + leading_trim_bytes(lines[term_index].text),
1574        },
1575    )
1576}
1577
1578fn is_description_term_line(line: &str, options: &SyntaxOptions) -> bool {
1579    leading_indent_columns(line) <= 3
1580        && !line.trim().is_empty()
1581        && description_marker(line).is_none()
1582        && !likely_block_start(line, options)
1583}
1584
1585fn description_marker(line: &str) -> Option<DescriptionMarker<'_>> {
1586    let (columns, bytes) = leading_indent(line);
1587    if columns > 2 || !matches!(line.as_bytes().get(bytes), Some(b':' | b'~')) {
1588        return None;
1589    }
1590    if line
1591        .as_bytes()
1592        .get(bytes + 1)
1593        .is_some_and(|byte| !matches!(*byte, b' ' | b'\t'))
1594    {
1595        return None;
1596    }
1597    let mut content_offset = bytes + 1;
1598    while line
1599        .as_bytes()
1600        .get(content_offset)
1601        .is_some_and(|byte| matches!(*byte, b' ' | b'\t'))
1602    {
1603        content_offset += 1;
1604    }
1605    Some(DescriptionMarker {
1606        content_offset,
1607        content: &line[content_offset..],
1608    })
1609}
1610
1611/// A paragraph inside an indent-continuation container (footnote/description
1612/// detail) keeps absorbing the next line as long as it is non-blank and does
1613/// not itself begin a new block.
1614fn paragraph_stays_open(line: &str, options: &SyntaxOptions) -> bool {
1615    !line.trim().is_empty() && !likely_block_start(line, options)
1616}
1617
1618/// Strips one level of indent-continuation (four spaces or a tab) from a line.
1619fn strip_indent_continuation(input: &str) -> Option<&str> {
1620    input
1621        .strip_prefix("    ")
1622        .or_else(|| input.strip_prefix('\t'))
1623}
1624
1625fn parse_atx_heading(
1626    line: Line<'_>,
1627    options: &SyntaxOptions,
1628    definitions: &[String],
1629) -> Option<Block> {
1630    let text = trim_up_to_three_spaces(line.text)?;
1631    let depth = text
1632        .as_bytes()
1633        .iter()
1634        .take_while(|byte| **byte == b'#')
1635        .count();
1636    if depth == 0 || depth > 6 {
1637        return None;
1638    }
1639    if text
1640        .as_bytes()
1641        .get(depth)
1642        .is_some_and(|byte| !matches!(*byte, b' ' | b'\t'))
1643        && text.len() != depth
1644    {
1645        return None;
1646    }
1647    let after_opening = &text[depth..];
1648    let content_start_in_text = depth + leading_trim_bytes(after_opening);
1649    let content = trim_closing_hashes(after_opening.trim_start());
1650    let content_start = line.start + (line.text.len() - text.len()) + content_start_in_text;
1651    Some(Block::Heading(Heading {
1652        meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1653        depth: depth as u8,
1654        kind: HeadingKind::Atx,
1655        children: parse_inlines(
1656            content,
1657            content_start,
1658            options,
1659            definitions,
1660            &mut Vec::new(),
1661        ),
1662    }))
1663}
1664
1665fn parse_thematic_break(line: Line<'_>) -> Option<Block> {
1666    let text = trim_up_to_three_spaces(line.text)?.trim();
1667    let mut marker = None;
1668    let mut count = 0;
1669    for char in text.chars() {
1670        if char == ' ' || char == '\t' {
1671            continue;
1672        }
1673        let current = match char {
1674            '-' => ThematicBreakMarker::Dash,
1675            '*' => ThematicBreakMarker::Asterisk,
1676            '_' => ThematicBreakMarker::Underscore,
1677            _ => return None,
1678        };
1679        if marker.is_some_and(|marker| marker != current) {
1680            return None;
1681        }
1682        marker = Some(current);
1683        count += 1;
1684    }
1685    if count >= 3 {
1686        Some(Block::ThematicBreak(ThematicBreak {
1687            meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1688            marker: marker?,
1689        }))
1690    } else {
1691        None
1692    }
1693}
1694
1695fn parse_definition(
1696    lines: &[Line<'_>],
1697    index: usize,
1698    options: &SyntaxOptions,
1699    allow_subsequent_indent: bool,
1700) -> Option<(Block, usize)> {
1701    let line = lines[index];
1702    let text = trim_definition_start(line.text, allow_subsequent_indent)?;
1703    if !text.starts_with('[') {
1704        return None;
1705    }
1706
1707    // A reference-definition label may span several lines (CommonMark §4.7): the
1708    // `]:` closing the label can appear on a later line. Accumulate continuation
1709    // lines until the label closes, stopping at a blank line or end of input (a
1710    // blank line cannot occur inside a label). The first line's <=3-space indent
1711    // is already stripped by `trim_up_to_three_spaces`; continuation lines are
1712    // appended verbatim, and `normalize_label` collapses the interior newlines and
1713    // surrounding whitespace when the label is matched.
1714    let mut accumulated = String::from(text);
1715    let mut label_end_line = index;
1716    let close = loop {
1717        if let Some(close) = find_reference_label_end(&accumulated, 0) {
1718            if accumulated.as_bytes().get(close + 1) == Some(&b':') {
1719                break close;
1720            }
1721            // A closed label not followed by `:` is not a definition.
1722            return None;
1723        }
1724        let next = label_end_line + 1;
1725        if next >= lines.len() || lines[next].text.trim().is_empty() {
1726            return None;
1727        }
1728        // The unclosed label behaves like an open paragraph: a continuation line
1729        // that itself begins a block construct (a setext underline, or a GFM table
1730        // header/delimiter pair) interrupts it, so the definition fails and the
1731        // lines are re-parsed as blocks (CommonMark/GFM prefer setext headings,
1732        // thematic breaks, fenced code, and tables over a label that has not yet
1733        // closed — e.g. `[\na\n=\n]: b` or `[\na\n:-\n]: b`).
1734        if likely_block_start(lines[next].text, options)
1735            || setext_underline_depth(lines[next].text).is_some()
1736            || table_can_start(lines, next, options)
1737        {
1738            return None;
1739        }
1740        accumulated.push('\n');
1741        accumulated.push_str(lines[next].text);
1742        label_end_line = next;
1743    };
1744    let label = String::from(&accumulated[1..close]);
1745    if normalize_label(&label).is_empty() {
1746        return None;
1747    }
1748    let label = label.as_str();
1749    let mut source = String::from(&accumulated[close + 2..]);
1750    let mut cursor = label_end_line;
1751    let mut best_without_title = None;
1752
1753    loop {
1754        if let Some(resource) = parse_definition_destination_title(&source) {
1755            if resource.title.is_some() {
1756                return Some((
1757                    Block::Definition(Definition {
1758                        meta: NodeMeta::new(Some(Span::new(
1759                            line.start,
1760                            lines[cursor].end_with_eol,
1761                        ))),
1762                        label: label.into(),
1763                        identifier: normalize_label(label),
1764                        destination: resource.destination,
1765                        destination_kind: resource.destination_kind,
1766                        title: resource.title,
1767                        title_kind: resource.title_kind,
1768                    }),
1769                    cursor + 1,
1770                ));
1771            }
1772
1773            best_without_title = Some((resource, cursor + 1));
1774            let next = cursor + 1;
1775            if next >= lines.len()
1776                || lines[next].text.trim().is_empty()
1777                || !line_can_start_definition_title(lines[next].text)
1778            {
1779                break;
1780            }
1781        }
1782
1783        let next = cursor + 1;
1784        if next >= lines.len() || lines[next].text.trim().is_empty() {
1785            break;
1786        }
1787        // A continuation line that itself begins a block-level construct (or a
1788        // setext underline) cannot be swallowed into the definition's pending,
1789        // not-yet-closed title: such a line interrupts the would-be paragraph, so
1790        // the definition fails and the lines are re-parsed as blocks (e.g.
1791        // `[a]: b '` then `***` is a paragraph + thematic break, not a title).
1792        if likely_block_start(lines[next].text, options)
1793            || setext_underline_depth(lines[next].text).is_some()
1794        {
1795            break;
1796        }
1797        source.push('\n');
1798        source.push_str(lines[next].text);
1799        cursor = next;
1800    }
1801
1802    let (resource, next) = best_without_title?;
1803    let end = lines[next - 1].end_with_eol;
1804    Some((
1805        Block::Definition(Definition {
1806            meta: NodeMeta::new(Some(Span::new(line.start, end))),
1807            label: label.into(),
1808            identifier: normalize_label(label),
1809            destination: resource.destination,
1810            destination_kind: resource.destination_kind,
1811            title: resource.title,
1812            title_kind: resource.title_kind,
1813        }),
1814        next,
1815    ))
1816}
1817
1818fn trim_definition_start(input: &str, allow_subsequent_indent: bool) -> Option<&str> {
1819    if let Some(trimmed) = trim_up_to_three_spaces(input) {
1820        return Some(trimmed);
1821    }
1822    if allow_subsequent_indent {
1823        let (columns, bytes) = leading_indent(input);
1824        if columns == 4 {
1825            return Some(&input[bytes..]);
1826        }
1827    }
1828    None
1829}
1830
1831fn parse_footnote_definition(
1832    lines: &[Line<'_>],
1833    index: usize,
1834    options: &SyntaxOptions,
1835    definitions: &[String],
1836    diagnostics: &mut Vec<Diagnostic>,
1837) -> Option<(Block, usize)> {
1838    if !options.constructs.footnote_definition {
1839        return None;
1840    }
1841    let line = lines[index];
1842    let text = line.text.trim();
1843    if !text.starts_with("[^") {
1844        return None;
1845    }
1846    let close = find_footnote_definition_label_end(text)?;
1847    let label = &text[2..close];
1848    if !is_footnote_label(label) {
1849        return None;
1850    }
1851    let rest = text[close + 2..].trim();
1852    let mut content = String::new();
1853    push_line(&mut content, rest);
1854    let mut cursor = index + 1;
1855    let mut end = line.end_with_eol;
1856    let mut paragraph_open = paragraph_stays_open(rest, options);
1857
1858    while cursor < lines.len() {
1859        if lines[cursor].text.trim().is_empty() {
1860            let next = next_nonblank_line(lines, cursor + 1);
1861            if next >= lines.len() || !is_footnote_continuation(lines[next].text) {
1862                break;
1863            }
1864            push_line(&mut content, "");
1865            paragraph_open = false;
1866            end = lines[cursor].end_with_eol;
1867            cursor += 1;
1868            continue;
1869        }
1870
1871        let continuation = if let Some(continuation) = strip_indent_continuation(lines[cursor].text)
1872        {
1873            continuation
1874        } else if paragraph_open && !likely_block_start(lines[cursor].text, options) {
1875            trim_ascii_start(lines[cursor].text)
1876        } else {
1877            break;
1878        };
1879        paragraph_open = paragraph_stays_open(continuation, options);
1880        push_line(&mut content, continuation);
1881        end = lines[cursor].end_with_eol;
1882        cursor += 1;
1883    }
1884
1885    Some((
1886        Block::FootnoteDefinition(FootnoteDefinition {
1887            meta: NodeMeta::new(Some(Span::new(line.start, end))),
1888            label: label.into(),
1889            identifier: normalize_label(label),
1890            children: parse_blocks(
1891                &content,
1892                line.end.saturating_sub(rest.len()),
1893                false,
1894                options,
1895                definitions,
1896                diagnostics,
1897            ),
1898        }),
1899        cursor,
1900    ))
1901}
1902
1903fn is_footnote_continuation(input: &str) -> bool {
1904    strip_indent_continuation(input).is_some()
1905}
1906
1907fn parse_leaf_directive(
1908    line: Line<'_>,
1909    options: &SyntaxOptions,
1910    definitions: &[String],
1911    diagnostics: &mut Vec<Diagnostic>,
1912) -> Option<Block> {
1913    if !options.constructs.directive_leaf {
1914        return None;
1915    }
1916    let trimmed = line.text.trim_start();
1917    if trimmed.starts_with(":::") || !trimmed.starts_with("::") {
1918        return None;
1919    }
1920    let opener_base = line.start + (line.text.len() - trimmed.len()) + 2;
1921    let Some((name, label_source, attributes, _)) = parse_directive_opener(&trimmed[2..]) else {
1922        diagnostics.push(Diagnostic::new(
1923            DiagnosticSeverity::Error,
1924            DiagnosticCode::InvalidDirectiveName,
1925            Span::new(line.start, line.end),
1926            "leaf directive must have a valid name",
1927        ));
1928        return None;
1929    };
1930    let label = label_source
1931        .map(|source| {
1932            parse_inlines(
1933                source,
1934                opener_base + name.len() + 1,
1935                options,
1936                definitions,
1937                diagnostics,
1938            )
1939        })
1940        .unwrap_or_default();
1941    Some(Block::LeafDirective(LeafDirective {
1942        meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1943        name,
1944        label,
1945        attributes,
1946    }))
1947}
1948
1949fn parse_html_block(
1950    lines: &[Line<'_>],
1951    index: usize,
1952    options: &SyntaxOptions,
1953) -> Option<(Block, usize)> {
1954    if !options.constructs.html_block {
1955        return None;
1956    }
1957
1958    let trimmed = trim_up_to_three_spaces(lines[index].text)?;
1959    let kind = html_block_start(trimmed)?;
1960    let mut value = String::new();
1961    let mut cursor = index;
1962    match kind {
1963        HtmlBlockKind::RawTag => {
1964            // CommonMark §4.6 type-1: the block ends on a line containing ANY of
1965            // `</script>`, `</pre>`, `</style>`, `</textarea>` (case-insensitive),
1966            // regardless of which opened it.
1967            while cursor < lines.len() {
1968                push_line(&mut value, lines[cursor].text);
1969                if ["script", "pre", "style", "textarea"]
1970                    .iter()
1971                    .any(|tag| line_contains_raw_closing_tag(lines[cursor].text, tag))
1972                {
1973                    cursor += 1;
1974                    break;
1975                }
1976                cursor += 1;
1977            }
1978        }
1979        HtmlBlockKind::BlockTag => {
1980            while cursor < lines.len() && !lines[cursor].text.trim().is_empty() {
1981                push_line(&mut value, lines[cursor].text);
1982                cursor += 1;
1983            }
1984        }
1985        HtmlBlockKind::Until(end) => {
1986            while cursor < lines.len() {
1987                push_line(&mut value, lines[cursor].text);
1988                if lines[cursor].text.contains(end) {
1989                    cursor += 1;
1990                    break;
1991                }
1992                cursor += 1;
1993            }
1994        }
1995        HtmlBlockKind::UntilBlank => {
1996            while cursor < lines.len() && !lines[cursor].text.trim().is_empty() {
1997                push_line(&mut value, lines[cursor].text);
1998                cursor += 1;
1999            }
2000        }
2001    }
2002    Some((
2003        Block::HtmlBlock(HtmlBlock {
2004            meta: NodeMeta::new(Some(Span::new(
2005                lines[index].start,
2006                lines[cursor - 1].end_with_eol,
2007            ))),
2008            value,
2009        }),
2010        cursor,
2011    ))
2012}
2013
2014fn html_block_start(input: &str) -> Option<HtmlBlockKind> {
2015    let trimmed = input.trim_end();
2016    if !trimmed.starts_with('<') {
2017        return None;
2018    }
2019
2020    if raw_html_tag_start(trimmed) {
2021        return Some(HtmlBlockKind::RawTag);
2022    }
2023    if trimmed.starts_with("<!--") {
2024        return Some(HtmlBlockKind::Until("-->"));
2025    }
2026    if trimmed.starts_with("<?") {
2027        return Some(HtmlBlockKind::Until("?>"));
2028    }
2029    if is_declaration_start(trimmed) {
2030        return Some(HtmlBlockKind::Until(">"));
2031    }
2032    if trimmed.starts_with("<![CDATA[") {
2033        return Some(HtmlBlockKind::Until("]]>"));
2034    }
2035
2036    if html_block_tag_start(trimmed) {
2037        return Some(HtmlBlockKind::BlockTag);
2038    }
2039
2040    let Some((end, _tag_name)) = parse_html_tag(trimmed, 0) else {
2041        return None;
2042    };
2043    let rest = trimmed[end..].trim();
2044    if rest.is_empty() {
2045        Some(HtmlBlockKind::UntilBlank)
2046    } else {
2047        None
2048    }
2049}
2050
2051pub(crate) fn line_starts_html_block(input: &str) -> bool {
2052    trim_up_to_three_spaces(input)
2053        .and_then(html_block_start)
2054        .is_some()
2055}
2056
2057fn raw_html_tag_start(input: &str) -> bool {
2058    for tag in ["script", "pre", "style", "textarea"] {
2059        if html_raw_open_tag_prefix(input, tag) {
2060            return true;
2061        }
2062    }
2063    false
2064}
2065
2066fn html_raw_open_tag_prefix(input: &str, tag: &str) -> bool {
2067    let Some(rest) = input.strip_prefix('<') else {
2068        return false;
2069    };
2070    if rest.starts_with('/') || rest.len() < tag.len() {
2071        return false;
2072    }
2073    let rest_bytes = rest.as_bytes();
2074    let tag_bytes = tag.as_bytes();
2075    if !rest_bytes
2076        .get(..tag_bytes.len())
2077        .is_some_and(|name| name.eq_ignore_ascii_case(tag_bytes))
2078    {
2079        return false;
2080    }
2081    match rest_bytes.get(tag.len()) {
2082        None => true,
2083        Some(b' ' | b'\t' | b'\n' | b'\r' | b'>') => true,
2084        Some(b'/') => {
2085            rest_bytes.get(tag.len() + 1) == Some(&b'>') && rest_bytes.get(tag.len() + 2).is_none()
2086        }
2087        _ => false,
2088    }
2089}
2090
2091fn line_contains_raw_closing_tag(input: &str, tag: &str) -> bool {
2092    let bytes = input.as_bytes();
2093    let tag_bytes = tag.as_bytes();
2094    let mut cursor = 0;
2095
2096    while cursor + 2 + tag_bytes.len() <= bytes.len() {
2097        let tag_start = cursor + 2;
2098        let tag_end = tag_start + tag_bytes.len();
2099        if bytes.get(cursor) == Some(&b'<')
2100            && bytes.get(cursor + 1) == Some(&b'/')
2101            && bytes
2102                .get(tag_start..tag_end)
2103                .is_some_and(|name| name.eq_ignore_ascii_case(tag_bytes))
2104        {
2105            match bytes.get(tag_end) {
2106                Some(b'>') => return true,
2107                Some(byte) if byte.is_ascii_whitespace() => {
2108                    let mut after_space = tag_end;
2109                    while bytes
2110                        .get(after_space)
2111                        .is_some_and(|byte| byte.is_ascii_whitespace())
2112                    {
2113                        after_space += 1;
2114                    }
2115                    if bytes.get(after_space) == Some(&b'>') {
2116                        return true;
2117                    }
2118                }
2119                _ => {}
2120            }
2121        }
2122        cursor += 1;
2123    }
2124
2125    false
2126}
2127
2128fn html_block_tag_start(input: &str) -> bool {
2129    let bytes = input.as_bytes();
2130    if bytes.first() != Some(&b'<') {
2131        return false;
2132    }
2133
2134    let mut cursor = 1;
2135    if bytes.get(cursor) == Some(&b'/') {
2136        cursor += 1;
2137    }
2138
2139    let name_start = cursor;
2140    if !bytes
2141        .get(cursor)
2142        .is_some_and(|byte| byte.is_ascii_alphabetic())
2143    {
2144        return false;
2145    }
2146    cursor += 1;
2147    while bytes.get(cursor).is_some_and(|byte| html_name_byte(*byte)) {
2148        cursor += 1;
2149    }
2150
2151    let name = &input[name_start..cursor];
2152    if !html_block_tag(name) {
2153        return false;
2154    }
2155
2156    match bytes.get(cursor) {
2157        None | Some(b' ' | b'\t' | b'\n' | b'\r' | b'>') => true,
2158        Some(b'/') if bytes.get(cursor + 1) == Some(&b'>') => true,
2159        _ => false,
2160    }
2161}
2162
2163fn html_block_tag(tag: &str) -> bool {
2164    matches!(
2165        tag.to_ascii_lowercase().as_str(),
2166        "address"
2167            | "article"
2168            | "aside"
2169            | "base"
2170            | "basefont"
2171            | "blockquote"
2172            | "body"
2173            | "caption"
2174            | "center"
2175            | "col"
2176            | "colgroup"
2177            | "dd"
2178            | "details"
2179            | "dialog"
2180            | "dir"
2181            | "div"
2182            | "dl"
2183            | "dt"
2184            | "fieldset"
2185            | "figcaption"
2186            | "figure"
2187            | "footer"
2188            | "form"
2189            | "frame"
2190            | "frameset"
2191            | "h1"
2192            | "h2"
2193            | "h3"
2194            | "h4"
2195            | "h5"
2196            | "h6"
2197            | "head"
2198            | "header"
2199            | "hr"
2200            | "html"
2201            | "iframe"
2202            | "legend"
2203            | "li"
2204            | "link"
2205            | "main"
2206            | "menu"
2207            | "menuitem"
2208            | "nav"
2209            | "noframes"
2210            | "ol"
2211            | "optgroup"
2212            | "option"
2213            | "p"
2214            | "param"
2215            | "search"
2216            | "section"
2217            | "summary"
2218            | "table"
2219            | "tbody"
2220            | "td"
2221            | "tfoot"
2222            | "th"
2223            | "thead"
2224            | "title"
2225            | "tr"
2226            | "track"
2227            | "ul"
2228    )
2229}
2230
2231fn is_declaration_start(input: &str) -> bool {
2232    input
2233        .as_bytes()
2234        .get(2)
2235        .is_some_and(|byte| input.starts_with("<!") && byte.is_ascii_alphabetic())
2236}
2237
2238fn parse_mdx_flow(
2239    lines: &[Line<'_>],
2240    index: usize,
2241    options: &SyntaxOptions,
2242    diagnostics: &mut Vec<Diagnostic>,
2243) -> Option<(Block, usize)> {
2244    if options.constructs.mdx_esm {
2245        if let Some((block, next)) = parse_mdx_esm_flow(lines, index, diagnostics) {
2246            return Some((block, next));
2247        }
2248    }
2249
2250    let line = lines[index];
2251    let trimmed = line.text.trim_start();
2252    if options.constructs.mdx_expression_block && trimmed.starts_with('{') {
2253        let open_byte = line.text.len() - trimmed.len();
2254        if let Some((close_line, close_byte)) = find_mdx_expression_close(lines, index, open_byte) {
2255            return Some((
2256                Block::MdxExpression(MdxExpression {
2257                    meta: NodeMeta::new(Some(Span::new(line.start, lines[close_line].end))),
2258                    value: collect_mdx_expression_value(
2259                        lines, index, open_byte, close_line, close_byte,
2260                    ),
2261                }),
2262                close_line + 1,
2263            ));
2264        }
2265        diagnostics.push(Diagnostic::new(
2266            DiagnosticSeverity::Error,
2267            DiagnosticCode::InvalidMdx,
2268            Span::new(line.start + open_byte, lines.last()?.end_with_eol),
2269            "MDX expression block is missing a closing brace",
2270        ));
2271    }
2272    if options.constructs.mdx_jsx_block && trimmed.starts_with('<') {
2273        if let Some(close_line) = find_mdx_jsx_close(lines, index) {
2274            return Some((
2275                Block::MdxJsx(MdxJsx {
2276                    meta: NodeMeta::new(Some(Span::new(line.start, lines[close_line].end))),
2277                    value: collect_line_range(lines, index, close_line),
2278                }),
2279                close_line + 1,
2280            ));
2281        }
2282        let start_byte = line.text.len() - trimmed.len();
2283        if let Some(root) = mdx_jsx_tag_start(line.text, start_byte) {
2284            if !root.closing {
2285                if let Some((_tag_end_line, _tag_end_byte, self_closing)) =
2286                    find_mdx_jsx_tag_end(lines, index, start_byte)
2287                {
2288                    if !self_closing {
2289                        diagnostics.push(Diagnostic::new(
2290                            DiagnosticSeverity::Error,
2291                            DiagnosticCode::InvalidMdx,
2292                            Span::new(line.start + start_byte, lines.last()?.end_with_eol),
2293                            "MDX JSX block is missing a closing tag",
2294                        ));
2295                    }
2296                }
2297            }
2298        }
2299    }
2300    None
2301}
2302
2303#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2304struct MdxEsmState {
2305    brace_depth: usize,
2306    bracket_depth: usize,
2307    paren_depth: usize,
2308    block_comment: bool,
2309    quote: Option<u8>,
2310    escaped: bool,
2311}
2312
2313#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2314enum MdxBraceState {
2315    Normal,
2316    SingleQuoted,
2317    DoubleQuoted,
2318    Template,
2319    LineComment,
2320    BlockComment,
2321}
2322
2323#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2324enum MdxJsxTag<'a> {
2325    Fragment,
2326    Named(&'a str),
2327}
2328
2329#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2330struct MdxJsxTagStart<'a> {
2331    tag: MdxJsxTag<'a>,
2332    closing: bool,
2333}
2334
2335fn parse_mdx_esm_flow(
2336    lines: &[Line<'_>],
2337    index: usize,
2338    diagnostics: &mut Vec<Diagnostic>,
2339) -> Option<(Block, usize)> {
2340    if !is_mdx_esm_start(lines[index].text) {
2341        return None;
2342    }
2343
2344    let mut value = String::new();
2345    let mut state = MdxEsmState::default();
2346    let mut cursor = index;
2347    while cursor < lines.len() {
2348        let line = lines[cursor].text;
2349        if cursor > index && !is_mdx_esm_continuation(line, &state) {
2350            break;
2351        }
2352        if cursor > index {
2353            value.push('\n');
2354        }
2355        value.push_str(line);
2356        update_mdx_esm_state(line, &mut state);
2357        cursor += 1;
2358    }
2359    if cursor >= lines.len() && state_has_open_mdx_esm_construct(&state) {
2360        diagnostics.push(Diagnostic::new(
2361            DiagnosticSeverity::Error,
2362            DiagnosticCode::InvalidMdx,
2363            Span::new(lines[index].start, lines[cursor - 1].end_with_eol),
2364            "MDX ESM block is missing a closing delimiter",
2365        ));
2366    }
2367
2368    Some((
2369        Block::MdxEsm(MdxEsm {
2370            meta: NodeMeta::new(Some(Span::new(lines[index].start, lines[cursor - 1].end))),
2371            value,
2372        }),
2373        cursor,
2374    ))
2375}
2376
2377fn is_mdx_esm_start(line: &str) -> bool {
2378    line.starts_with("import ") || line.starts_with("export ")
2379}
2380
2381fn is_mdx_esm_continuation(line: &str, state: &MdxEsmState) -> bool {
2382    if state_has_open_mdx_esm_construct(state) {
2383        return true;
2384    }
2385    let trimmed = line.trim_start();
2386    if trimmed.is_empty() {
2387        return false;
2388    }
2389    is_mdx_esm_start(line) || trimmed.starts_with("//") || trimmed.starts_with("/*")
2390}
2391
2392fn state_has_open_mdx_esm_construct(state: &MdxEsmState) -> bool {
2393    state.brace_depth > 0
2394        || state.bracket_depth > 0
2395        || state.paren_depth > 0
2396        || state.block_comment
2397        || state.quote == Some(b'`')
2398}
2399
2400fn update_mdx_esm_state(line: &str, state: &mut MdxEsmState) {
2401    let bytes = line.as_bytes();
2402    let mut index = 0;
2403    while index < bytes.len() {
2404        let byte = bytes[index];
2405        if state.block_comment {
2406            if byte == b'*' && bytes.get(index + 1) == Some(&b'/') {
2407                state.block_comment = false;
2408                index += 1;
2409            }
2410            index += 1;
2411            continue;
2412        }
2413
2414        if let Some(delimiter) = state.quote {
2415            if state.escaped {
2416                state.escaped = false;
2417            } else if byte == b'\\' {
2418                state.escaped = true;
2419            } else if byte == delimiter {
2420                state.quote = None;
2421            }
2422            index += 1;
2423            continue;
2424        }
2425
2426        match byte {
2427            b'\'' | b'"' | b'`' => state.quote = Some(byte),
2428            b'/' if bytes.get(index + 1) == Some(&b'/') => break,
2429            b'/' if bytes.get(index + 1) == Some(&b'*') => {
2430                state.block_comment = true;
2431                index += 1;
2432            }
2433            b'{' => state.brace_depth += 1,
2434            b'}' => state.brace_depth = state.brace_depth.saturating_sub(1),
2435            b'[' => state.bracket_depth += 1,
2436            b']' => state.bracket_depth = state.bracket_depth.saturating_sub(1),
2437            b'(' => state.paren_depth += 1,
2438            b')' => state.paren_depth = state.paren_depth.saturating_sub(1),
2439            _ => {}
2440        }
2441        index += 1;
2442    }
2443}
2444
2445fn find_mdx_expression_close(
2446    lines: &[Line<'_>],
2447    index: usize,
2448    open_byte: usize,
2449) -> Option<(usize, usize)> {
2450    let mut depth = 0usize;
2451    let mut state = MdxBraceState::Normal;
2452    let mut escaped = false;
2453    let mut cursor = index;
2454
2455    while cursor < lines.len() {
2456        let bytes = lines[cursor].text.as_bytes();
2457        let mut byte_index = if cursor == index { open_byte } else { 0 };
2458        while byte_index < bytes.len() {
2459            let byte = bytes[byte_index];
2460            match state {
2461                MdxBraceState::Normal => match byte {
2462                    b'\'' => state = MdxBraceState::SingleQuoted,
2463                    b'"' => state = MdxBraceState::DoubleQuoted,
2464                    b'`' => state = MdxBraceState::Template,
2465                    b'/' if bytes.get(byte_index + 1) == Some(&b'/') => {
2466                        state = MdxBraceState::LineComment;
2467                        break;
2468                    }
2469                    b'/' if bytes.get(byte_index + 1) == Some(&b'*') => {
2470                        state = MdxBraceState::BlockComment;
2471                        byte_index += 1;
2472                    }
2473                    b'{' => depth += 1,
2474                    b'}' => {
2475                        depth = depth.checked_sub(1)?;
2476                        if depth == 0 {
2477                            return lines[cursor].text[byte_index + 1..]
2478                                .trim()
2479                                .is_empty()
2480                                .then_some((cursor, byte_index));
2481                        }
2482                    }
2483                    _ => {}
2484                },
2485                MdxBraceState::SingleQuoted => {
2486                    update_mdx_quote_state(byte, b'\'', &mut state, &mut escaped);
2487                }
2488                MdxBraceState::DoubleQuoted => {
2489                    update_mdx_quote_state(byte, b'"', &mut state, &mut escaped);
2490                }
2491                MdxBraceState::Template => {
2492                    update_mdx_quote_state(byte, b'`', &mut state, &mut escaped);
2493                }
2494                MdxBraceState::LineComment => break,
2495                MdxBraceState::BlockComment => {
2496                    if byte == b'*' && bytes.get(byte_index + 1) == Some(&b'/') {
2497                        state = MdxBraceState::Normal;
2498                        byte_index += 1;
2499                    }
2500                }
2501            }
2502            byte_index += 1;
2503        }
2504        if state == MdxBraceState::LineComment {
2505            state = MdxBraceState::Normal;
2506        }
2507        cursor += 1;
2508    }
2509
2510    None
2511}
2512
2513fn update_mdx_quote_state(byte: u8, delimiter: u8, state: &mut MdxBraceState, escaped: &mut bool) {
2514    if *escaped {
2515        *escaped = false;
2516        return;
2517    }
2518    if byte == b'\\' {
2519        *escaped = true;
2520        return;
2521    }
2522    if byte == delimiter {
2523        *state = MdxBraceState::Normal;
2524    }
2525}
2526
2527fn find_mdx_expression_inline_close(input: &str, open_byte: usize) -> Option<usize> {
2528    let bytes = input.as_bytes();
2529    if bytes.get(open_byte) != Some(&b'{') {
2530        return None;
2531    }
2532
2533    let mut depth = 0usize;
2534    let mut state = MdxBraceState::Normal;
2535    let mut escaped = false;
2536    let mut cursor = open_byte;
2537    while cursor < bytes.len() {
2538        let byte = bytes[cursor];
2539        match state {
2540            MdxBraceState::Normal => match byte {
2541                b'\'' => state = MdxBraceState::SingleQuoted,
2542                b'"' => state = MdxBraceState::DoubleQuoted,
2543                b'`' => state = MdxBraceState::Template,
2544                b'/' if bytes.get(cursor + 1) == Some(&b'/') => {
2545                    state = MdxBraceState::LineComment;
2546                    cursor += 1;
2547                }
2548                b'/' if bytes.get(cursor + 1) == Some(&b'*') => {
2549                    state = MdxBraceState::BlockComment;
2550                    cursor += 1;
2551                }
2552                b'{' => depth += 1,
2553                b'}' => {
2554                    depth = depth.checked_sub(1)?;
2555                    if depth == 0 {
2556                        return Some(cursor);
2557                    }
2558                }
2559                _ => {}
2560            },
2561            MdxBraceState::SingleQuoted => {
2562                update_mdx_quote_state(byte, b'\'', &mut state, &mut escaped);
2563            }
2564            MdxBraceState::DoubleQuoted => {
2565                update_mdx_quote_state(byte, b'"', &mut state, &mut escaped);
2566            }
2567            MdxBraceState::Template => {
2568                update_mdx_quote_state(byte, b'`', &mut state, &mut escaped);
2569            }
2570            MdxBraceState::LineComment => {
2571                if byte == b'\n' {
2572                    state = MdxBraceState::Normal;
2573                }
2574            }
2575            MdxBraceState::BlockComment => {
2576                if byte == b'*' && bytes.get(cursor + 1) == Some(&b'/') {
2577                    state = MdxBraceState::Normal;
2578                    cursor += 1;
2579                }
2580            }
2581        }
2582        cursor += 1;
2583    }
2584    None
2585}
2586
2587fn collect_mdx_expression_value(
2588    lines: &[Line<'_>],
2589    start_line: usize,
2590    open_byte: usize,
2591    close_line: usize,
2592    close_byte: usize,
2593) -> String {
2594    let mut value = String::new();
2595    let mut cursor = start_line;
2596    while cursor <= close_line {
2597        if cursor > start_line {
2598            value.push('\n');
2599        }
2600        let line = lines[cursor].text;
2601        let segment = if cursor == start_line && cursor == close_line {
2602            &line[open_byte + 1..close_byte]
2603        } else if cursor == start_line {
2604            &line[open_byte + 1..]
2605        } else if cursor == close_line {
2606            &line[..close_byte]
2607        } else {
2608            line
2609        };
2610        value.push_str(segment);
2611        cursor += 1;
2612    }
2613    value
2614}
2615
2616fn find_mdx_jsx_close<'a>(lines: &'a [Line<'a>], index: usize) -> Option<usize> {
2617    let line = lines[index];
2618    let trimmed = line.text.trim_start();
2619    let start_byte = line.text.len() - trimmed.len();
2620    let root = mdx_jsx_tag_start(line.text, start_byte)?;
2621    if root.closing {
2622        return None;
2623    }
2624
2625    let (mut cursor_line, mut cursor_byte, self_closing) =
2626        find_mdx_jsx_tag_end(lines, index, start_byte)?;
2627    if self_closing {
2628        return Some(cursor_line);
2629    }
2630
2631    let mut depth = 1usize;
2632    cursor_byte += 1;
2633    'scan: while cursor_line < lines.len() {
2634        let line = lines[cursor_line].text;
2635        while cursor_byte < line.len() {
2636            let Some(relative_start) = line[cursor_byte..].find('<') else {
2637                break;
2638            };
2639            let tag_start_byte = cursor_byte + relative_start;
2640            let Some(candidate) = mdx_jsx_tag_start(line, tag_start_byte) else {
2641                cursor_byte = tag_start_byte + 1;
2642                continue;
2643            };
2644            let Some((tag_end_line, tag_end_byte, candidate_self_closing)) =
2645                find_mdx_jsx_tag_end(lines, cursor_line, tag_start_byte)
2646            else {
2647                return None;
2648            };
2649
2650            if mdx_jsx_tag_matches(root.tag, candidate.tag) {
2651                if candidate.closing {
2652                    depth = depth.saturating_sub(1);
2653                    if depth == 0 {
2654                        return Some(tag_end_line);
2655                    }
2656                } else if !candidate_self_closing {
2657                    depth += 1;
2658                }
2659            }
2660
2661            cursor_byte = tag_end_byte + 1;
2662            if tag_end_line != cursor_line {
2663                cursor_line = tag_end_line;
2664                continue 'scan;
2665            }
2666        }
2667        cursor_line += 1;
2668        cursor_byte = 0;
2669    }
2670    None
2671}
2672
2673fn parse_mdx_jsx_inline(input: &str, index: usize) -> Option<(usize, String)> {
2674    let root = mdx_jsx_tag_start(input, index)?;
2675    if root.closing {
2676        return None;
2677    }
2678
2679    let (mut cursor, self_closing) = find_mdx_jsx_tag_end_in_text(input, index)?;
2680    if self_closing {
2681        let end = cursor + 1;
2682        return Some((end, input[index..end].into()));
2683    }
2684
2685    let mut depth = 1usize;
2686    cursor += 1;
2687    while cursor < input.len() {
2688        let Some(relative_start) = input[cursor..].find('<') else {
2689            return None;
2690        };
2691        let tag_start_byte = cursor + relative_start;
2692        let Some(candidate) = mdx_jsx_tag_start(input, tag_start_byte) else {
2693            cursor = tag_start_byte + 1;
2694            continue;
2695        };
2696        let Some((tag_end, candidate_self_closing)) =
2697            find_mdx_jsx_tag_end_in_text(input, tag_start_byte)
2698        else {
2699            return None;
2700        };
2701
2702        if mdx_jsx_tag_matches(root.tag, candidate.tag) {
2703            if candidate.closing {
2704                depth = depth.saturating_sub(1);
2705                if depth == 0 {
2706                    let end = tag_end + 1;
2707                    return Some((end, input[index..end].into()));
2708                }
2709            } else if !candidate_self_closing {
2710                depth += 1;
2711            }
2712        }
2713        cursor = tag_end + 1;
2714    }
2715    None
2716}
2717
2718fn mdx_jsx_tag_start(input: &str, start: usize) -> Option<MdxJsxTagStart<'_>> {
2719    let bytes = input.as_bytes();
2720    if bytes.get(start) != Some(&b'<') {
2721        return None;
2722    }
2723
2724    match bytes.get(start + 1) {
2725        Some(b'>') => {
2726            return Some(MdxJsxTagStart {
2727                tag: MdxJsxTag::Fragment,
2728                closing: false,
2729            });
2730        }
2731        Some(b'/') if bytes.get(start + 2) == Some(&b'>') => {
2732            return Some(MdxJsxTagStart {
2733                tag: MdxJsxTag::Fragment,
2734                closing: true,
2735            });
2736        }
2737        Some(b'!' | b'?') | None => return None,
2738        _ => {}
2739    }
2740
2741    let closing = bytes.get(start + 1) == Some(&b'/');
2742    let name_start = start + if closing { 2 } else { 1 };
2743    if !bytes
2744        .get(name_start)
2745        .is_some_and(|byte| is_mdx_jsx_name_start_byte(*byte))
2746    {
2747        return None;
2748    }
2749
2750    let mut name_end = name_start + 1;
2751    while bytes
2752        .get(name_end)
2753        .is_some_and(|byte| is_mdx_jsx_name_byte(*byte))
2754    {
2755        name_end += 1;
2756    }
2757    if name_end == name_start {
2758        return None;
2759    }
2760    if bytes
2761        .get(name_end)
2762        .is_some_and(|byte| !is_mdx_jsx_name_delimiter(*byte))
2763    {
2764        return None;
2765    }
2766    Some(MdxJsxTagStart {
2767        tag: MdxJsxTag::Named(&input[name_start..name_end]),
2768        closing,
2769    })
2770}
2771
2772fn mdx_jsx_tag_matches(left: MdxJsxTag<'_>, right: MdxJsxTag<'_>) -> bool {
2773    match (left, right) {
2774        (MdxJsxTag::Fragment, MdxJsxTag::Fragment) => true,
2775        (MdxJsxTag::Named(left), MdxJsxTag::Named(right)) => left == right,
2776        _ => false,
2777    }
2778}
2779
2780fn find_mdx_jsx_tag_end(
2781    lines: &[Line<'_>],
2782    start_line: usize,
2783    start_byte: usize,
2784) -> Option<(usize, usize, bool)> {
2785    let mut line_index = start_line;
2786    let mut byte_index = start_byte + 1;
2787    let mut quote = None;
2788    let mut escaped = false;
2789    let mut expression_depth = 0usize;
2790    let mut expression_state = MdxBraceState::Normal;
2791    let mut expression_escaped = false;
2792
2793    while line_index < lines.len() {
2794        let bytes = lines[line_index].text.as_bytes();
2795        while byte_index < bytes.len() {
2796            let byte = bytes[byte_index];
2797            if expression_depth > 0 {
2798                if update_mdx_jsx_expression_state(
2799                    byte,
2800                    bytes.get(byte_index + 1).copied(),
2801                    &mut expression_depth,
2802                    &mut expression_state,
2803                    &mut expression_escaped,
2804                ) {
2805                    byte_index += 1;
2806                }
2807                byte_index += 1;
2808                continue;
2809            }
2810
2811            if let Some(delimiter) = quote {
2812                if escaped {
2813                    escaped = false;
2814                } else if byte == b'\\' {
2815                    escaped = true;
2816                } else if byte == delimiter {
2817                    quote = None;
2818                }
2819                byte_index += 1;
2820                continue;
2821            }
2822
2823            match byte {
2824                b'\'' | b'"' => quote = Some(byte),
2825                b'{' => {
2826                    expression_depth = 1;
2827                    expression_state = MdxBraceState::Normal;
2828                    expression_escaped = false;
2829                }
2830                b'>' if expression_depth == 0 => {
2831                    let self_closing =
2832                        previous_nonspace_before(lines, line_index, byte_index) == Some(b'/');
2833                    return Some((line_index, byte_index, self_closing));
2834                }
2835                _ => {}
2836            }
2837            byte_index += 1;
2838        }
2839        if expression_state == MdxBraceState::LineComment {
2840            expression_state = MdxBraceState::Normal;
2841        }
2842        line_index += 1;
2843        byte_index = 0;
2844    }
2845    None
2846}
2847
2848fn previous_nonspace_before(
2849    lines: &[Line<'_>],
2850    line_index: usize,
2851    byte_index: usize,
2852) -> Option<u8> {
2853    let mut cursor_line = line_index;
2854    let mut cursor_byte = byte_index;
2855
2856    loop {
2857        if let Some(byte) = lines[cursor_line].text.as_bytes()[..cursor_byte]
2858            .iter()
2859            .rev()
2860            .copied()
2861            .find(|byte| !byte.is_ascii_whitespace())
2862        {
2863            return Some(byte);
2864        }
2865        if cursor_line == 0 {
2866            return None;
2867        }
2868        cursor_line -= 1;
2869        cursor_byte = lines[cursor_line].text.len();
2870    }
2871}
2872
2873fn find_mdx_jsx_tag_end_in_text(input: &str, start_byte: usize) -> Option<(usize, bool)> {
2874    let bytes = input.as_bytes();
2875    let mut byte_index = start_byte + 1;
2876    let mut quote = None;
2877    let mut escaped = false;
2878    let mut expression_depth = 0usize;
2879    let mut expression_state = MdxBraceState::Normal;
2880    let mut expression_escaped = false;
2881
2882    while byte_index < bytes.len() {
2883        let byte = bytes[byte_index];
2884        if expression_depth > 0 {
2885            if update_mdx_jsx_expression_state(
2886                byte,
2887                bytes.get(byte_index + 1).copied(),
2888                &mut expression_depth,
2889                &mut expression_state,
2890                &mut expression_escaped,
2891            ) {
2892                byte_index += 1;
2893            }
2894            byte_index += 1;
2895            continue;
2896        }
2897
2898        if let Some(delimiter) = quote {
2899            if escaped {
2900                escaped = false;
2901            } else if byte == b'\\' {
2902                escaped = true;
2903            } else if byte == delimiter {
2904                quote = None;
2905            }
2906            byte_index += 1;
2907            continue;
2908        }
2909
2910        match byte {
2911            b'\'' | b'"' => quote = Some(byte),
2912            b'{' => {
2913                expression_depth = 1;
2914                expression_state = MdxBraceState::Normal;
2915                expression_escaped = false;
2916            }
2917            b'>' if expression_depth == 0 => {
2918                let self_closing = previous_nonspace_before_text(input, byte_index) == Some(b'/');
2919                return Some((byte_index, self_closing));
2920            }
2921            _ => {}
2922        }
2923        byte_index += 1;
2924    }
2925    None
2926}
2927
2928fn previous_nonspace_before_text(input: &str, byte_index: usize) -> Option<u8> {
2929    input.as_bytes()[..byte_index]
2930        .iter()
2931        .rev()
2932        .copied()
2933        .find(|byte| !byte.is_ascii_whitespace())
2934}
2935
2936fn update_mdx_jsx_expression_state(
2937    byte: u8,
2938    next: Option<u8>,
2939    depth: &mut usize,
2940    state: &mut MdxBraceState,
2941    escaped: &mut bool,
2942) -> bool {
2943    match *state {
2944        MdxBraceState::Normal => match byte {
2945            b'\'' => *state = MdxBraceState::SingleQuoted,
2946            b'"' => *state = MdxBraceState::DoubleQuoted,
2947            b'`' => *state = MdxBraceState::Template,
2948            b'/' if next == Some(b'/') => {
2949                *state = MdxBraceState::LineComment;
2950                return true;
2951            }
2952            b'/' if next == Some(b'*') => {
2953                *state = MdxBraceState::BlockComment;
2954                return true;
2955            }
2956            b'{' => *depth += 1,
2957            b'}' => {
2958                *depth = (*depth).saturating_sub(1);
2959                if *depth == 0 {
2960                    *state = MdxBraceState::Normal;
2961                    *escaped = false;
2962                }
2963            }
2964            _ => {}
2965        },
2966        MdxBraceState::SingleQuoted => {
2967            update_mdx_quote_state(byte, b'\'', state, escaped);
2968        }
2969        MdxBraceState::DoubleQuoted => {
2970            update_mdx_quote_state(byte, b'"', state, escaped);
2971        }
2972        MdxBraceState::Template => {
2973            update_mdx_quote_state(byte, b'`', state, escaped);
2974        }
2975        MdxBraceState::LineComment => {
2976            if byte == b'\n' {
2977                *state = MdxBraceState::Normal;
2978            }
2979        }
2980        MdxBraceState::BlockComment => {
2981            if byte == b'*' && next == Some(b'/') {
2982                *state = MdxBraceState::Normal;
2983                return true;
2984            }
2985        }
2986    }
2987    false
2988}
2989
2990fn is_mdx_jsx_name_start_byte(byte: u8) -> bool {
2991    byte.is_ascii_alphabetic() || matches!(byte, b'_' | b'$')
2992}
2993
2994fn is_mdx_jsx_name_byte(byte: u8) -> bool {
2995    byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b':' | b'_' | b'-' | b'$')
2996}
2997
2998fn is_mdx_jsx_name_delimiter(byte: u8) -> bool {
2999    byte.is_ascii_whitespace() || matches!(byte, b'/' | b'>' | b'{' | b'}')
3000}
3001
3002fn collect_line_range(lines: &[Line<'_>], start: usize, end: usize) -> String {
3003    let mut value = String::new();
3004    let mut cursor = start;
3005    while cursor <= end {
3006        if cursor > start {
3007            value.push('\n');
3008        }
3009        value.push_str(lines[cursor].text);
3010        cursor += 1;
3011    }
3012    value
3013}
3014
3015fn parse_indented_code(
3016    lines: &[Line<'_>],
3017    index: usize,
3018    options: &SyntaxOptions,
3019) -> Option<(Block, usize)> {
3020    if !options.constructs.indented_code || strip_indented_code_prefix(lines[index].text).is_none()
3021    {
3022        return None;
3023    }
3024    let mut value = String::new();
3025    let mut cursor = index;
3026    // Track the last line that carried real content: leading and trailing blank
3027    // lines are not part of an indented code block, only interior ones are.
3028    let mut content_end = index;
3029    let mut content_end_len = 0usize;
3030    while cursor < lines.len() {
3031        if let Some(text) = strip_indented_code_prefix(lines[cursor].text) {
3032            ensure_line_separator(&mut value);
3033            value.push_str(text);
3034            value.push_str(lines[cursor].eol);
3035            if !text.trim().is_empty() {
3036                content_end = cursor;
3037                content_end_len = value.len();
3038            }
3039            cursor += 1;
3040            continue;
3041        }
3042
3043        if !lines[cursor].text.trim().is_empty() {
3044            break;
3045        }
3046        ensure_line_separator(&mut value);
3047        value.push_str(lines[cursor].eol);
3048        cursor += 1;
3049    }
3050    // Drop trailing blank lines accumulated past the last real content line.
3051    value.truncate(content_end_len);
3052    Some((
3053        Block::CodeBlock(CodeBlock {
3054            meta: NodeMeta::new(Some(Span::new(
3055                lines[index].start,
3056                lines[content_end].end_with_eol,
3057            ))),
3058            kind: CodeBlockKind::Indented,
3059            info: None,
3060            value,
3061        }),
3062        cursor,
3063    ))
3064}
3065
3066fn strip_indented_code_prefix(input: &str) -> Option<&str> {
3067    let mut column = 0usize;
3068    for (index, byte) in input.as_bytes().iter().enumerate() {
3069        match *byte {
3070            b' ' => {
3071                column += 1;
3072                if column == 4 {
3073                    return Some(&input[index + 1..]);
3074                }
3075            }
3076            b'\t' => {
3077                column += 4 - (column % 4);
3078                if column >= 4 {
3079                    return Some(&input[index + 1..]);
3080                }
3081            }
3082            _ => return None,
3083        }
3084    }
3085    None
3086}
3087
3088fn parse_table(
3089    lines: &[Line<'_>],
3090    index: usize,
3091    options: &SyntaxOptions,
3092    definitions: &[String],
3093    diagnostics: &mut Vec<Diagnostic>,
3094) -> Option<(Block, usize)> {
3095    if !options.constructs.gfm_table || index + 1 >= lines.len() {
3096        return None;
3097    }
3098    let delimiter = table_indent_line(lines[index + 1].text, options.constructs.indented_code)?;
3099    if list_marker_info(delimiter).is_some() {
3100        return None;
3101    }
3102    if !table_has_separator(lines[index].text, delimiter, options.constructs.spoiler) {
3103        return None;
3104    }
3105    let alignments = parse_table_delimiter(delimiter, options.constructs.spoiler)?;
3106    let headers = split_table_row(lines[index].text, options.constructs.spoiler);
3107    if headers.len() != alignments.len() {
3108        return None;
3109    }
3110
3111    let mut rows = Vec::new();
3112    rows.push(TableRow {
3113        meta: NodeMeta::new(Some(Span::new(lines[index].start, lines[index].end))),
3114        cells: headers
3115            .iter()
3116            .map(|cell| TableCell {
3117                meta: NodeMeta::default(),
3118                children: parse_inlines(
3119                    cell.trim(),
3120                    lines[index].start,
3121                    options,
3122                    definitions,
3123                    diagnostics,
3124                ),
3125            })
3126            .collect(),
3127    });
3128
3129    let mut cursor = index + 2;
3130    while cursor < lines.len() {
3131        let Some(row) = table_indent_line(lines[cursor].text, options.constructs.indented_code)
3132        else {
3133            break;
3134        };
3135        // Once a table is open, every non-blank line that isn't a real block
3136        // start is a body row (GFM); pipeless lines (incl. setext underlines)
3137        // become a single padded cell.
3138        if row.trim().is_empty() || table_body_line_ends_table(lines[cursor].text, options) {
3139            break;
3140        }
3141        let cells = split_table_row(row, options.constructs.spoiler);
3142        rows.push(TableRow {
3143            meta: NodeMeta::new(Some(Span::new(lines[cursor].start, lines[cursor].end))),
3144            cells: alignments
3145                .iter()
3146                .enumerate()
3147                .map(|(cell_index, _)| {
3148                    let value = cells.get(cell_index).map(String::as_str).unwrap_or("");
3149                    TableCell {
3150                        meta: NodeMeta::default(),
3151                        children: parse_inlines(
3152                            value.trim(),
3153                            lines[cursor].start,
3154                            options,
3155                            definitions,
3156                            diagnostics,
3157                        ),
3158                    }
3159                })
3160                .collect(),
3161        });
3162        cursor += 1;
3163    }
3164
3165    Some((
3166        Block::Table(Table {
3167            meta: NodeMeta::new(Some(Span::new(
3168                lines[index].start,
3169                lines[cursor - 1].end_with_eol,
3170            ))),
3171            alignments,
3172            rows,
3173        }),
3174        cursor,
3175    ))
3176}
3177
3178fn parse_setext_heading(
3179    lines: &[Line<'_>],
3180    index: usize,
3181    options: &SyntaxOptions,
3182    definitions: &[String],
3183) -> Option<(Block, usize)> {
3184    if index + 1 >= lines.len() || lines[index].text.trim().is_empty() {
3185        return None;
3186    }
3187
3188    // A setext heading is a (possibly multi-line) paragraph followed by an
3189    // underline. Scan over paragraph-continuation lines to find the underline,
3190    // stopping if a continuation line is itself a block start (which would
3191    // interrupt the paragraph before any underline could apply).
3192    let mut underline_index = index + 1;
3193    loop {
3194        // A setext underline that arrived as a LAZY block-quote continuation is
3195        // paragraph text, not an underline: `> a\n===` is `<p>a\n===</p>`, while
3196        // a MARKED `> a\n> ---` stays an H2 (its `---` is not lazy). The lazy
3197        // flag distinguishes the two; a lazy underline keeps scanning as
3198        // ordinary paragraph-continuation text.
3199        let underline_depth = if lines[underline_index].lazy {
3200            None
3201        } else {
3202            setext_underline_depth(lines[underline_index].text)
3203        };
3204        if let Some(depth) = underline_depth {
3205            let mut value = String::new();
3206            for line in &lines[index..underline_index] {
3207                // Trim leading indentation only: a fully `.trim()`ed content line
3208                // would discard the trailing spaces that form a hard line break.
3209                push_line(&mut value, trim_ascii_start(line.text));
3210            }
3211            return Some((
3212                Block::Heading(Heading {
3213                    meta: NodeMeta::new(Some(Span::new(
3214                        lines[index].start,
3215                        lines[underline_index].end,
3216                    ))),
3217                    depth,
3218                    kind: HeadingKind::Setext,
3219                    children: parse_inlines(
3220                        &value,
3221                        lines[index].start,
3222                        options,
3223                        definitions,
3224                        &mut Vec::new(),
3225                    ),
3226                }),
3227                underline_index + 1,
3228            ));
3229        }
3230
3231        // Not an underline: it must be a valid paragraph-continuation line for
3232        // the run to remain a setext heading.
3233        let line = lines[underline_index].text;
3234        if line.trim().is_empty()
3235            || table_can_start(lines, underline_index, options)
3236            || likely_block_start(line, options)
3237        {
3238            return None;
3239        }
3240        underline_index += 1;
3241        if underline_index >= lines.len() {
3242            return None;
3243        }
3244    }
3245}
3246
3247fn setext_underline_depth(input: &str) -> Option<u8> {
3248    let underline = trim_up_to_three_spaces(input)?.trim();
3249    match underline {
3250        text if !text.is_empty() && text.chars().all(|char| char == '=') => Some(1),
3251        text if !text.is_empty() && text.chars().all(|char| char == '-') => Some(2),
3252        _ => None,
3253    }
3254}
3255
3256fn parse_paragraph(
3257    lines: &[Line<'_>],
3258    index: usize,
3259    options: &SyntaxOptions,
3260    definitions: &[String],
3261    diagnostics: &mut Vec<Diagnostic>,
3262) -> (Block, usize) {
3263    let mut value = String::new();
3264    let start = lines[index].start;
3265    let mut cursor = index;
3266    while cursor < lines.len() {
3267        if lines[cursor].text.trim().is_empty() {
3268            break;
3269        }
3270        // A lazy continuation line is paragraph text by construction (it reached
3271        // this paragraph as the dedented tail of an enclosing container), so it
3272        // cannot itself start a new block — skip the block-boundary checks.
3273        if cursor > index && !lines[cursor].lazy {
3274            if table_can_start(lines, cursor, options) {
3275                break;
3276            }
3277            if likely_block_start(lines[cursor].text, options) {
3278                break;
3279            }
3280        }
3281        if !value.is_empty() {
3282            value.push('\n');
3283        }
3284        value.push_str(trim_ascii_start(lines[cursor].text));
3285        cursor += 1;
3286    }
3287
3288    let end = lines[cursor - 1].end;
3289    (
3290        Block::Paragraph(Paragraph {
3291            meta: NodeMeta::new(Some(Span::new(start, end))),
3292            children: parse_inlines(&value, start, options, definitions, diagnostics),
3293        }),
3294        cursor,
3295    )
3296}
3297
3298/// A `*` or `_` delimiter run recorded during the inline scan for later
3299/// resolution by the CommonMark delimiter-stack algorithm (`process_emphasis`).
3300#[derive(Clone, Copy)]
3301struct DelimMarker {
3302    /// Index of the placeholder text node in the flat node list. The text node
3303    /// holds the as-yet-unmatched delimiter characters; matching trims it from
3304    /// the appropriate side and matched characters are removed entirely.
3305    node_index: usize,
3306    marker: u8,
3307    /// Remaining unmatched delimiter characters in this run.
3308    length: usize,
3309    can_open: bool,
3310    can_close: bool,
3311    /// Absolute byte offset of the run's first remaining delimiter character.
3312    span_start: usize,
3313    /// `true` once this run is consumed (fully matched) or demoted to plain text.
3314    inactive: bool,
3315}
3316
3317/// Records a `*`/`_`/`~` delimiter run as a literal text node plus a stack
3318/// entry.
3319///
3320/// Flanking is computed on the whole run (CommonMark treats left/right-flanking
3321/// as a property of the run, not of an individual delimiter), so the same
3322/// `can_open`/`can_close` helpers that the older ad-hoc scanner used are reused
3323/// here unchanged — including the `_` intraword punctuation rules.
3324///
3325/// `strikethrough` enables the GFM cross-marker bonus: when strikethrough is an
3326/// active construct, a `*`/`_` run immediately adjacent to a `~` counts as
3327/// openable/closeable even though `~` is a punctuation character (this is what
3328/// makes `a*~b~*c` emphasize). The bonus is never granted to a `~` run itself —
3329/// tilde gets plain CommonMark flanking.
3330fn record_emphasis_delimiter(
3331    nodes: &mut Vec<Inline>,
3332    delimiters: &mut Vec<DelimMarker>,
3333    input: &str,
3334    index: usize,
3335    base_offset: usize,
3336    marker: u8,
3337    strikethrough: bool,
3338) {
3339    let length = delimiter_byte_run_len(input, index, marker);
3340    let (mut can_open, mut can_close) = if marker == b'_' {
3341        (
3342            can_open_underscore(input, index, length),
3343            can_close_underscore(input, index, length),
3344        )
3345    } else {
3346        (
3347            can_open_delimited(input, index, length),
3348            can_close_delimited(input, index, length),
3349        )
3350    };
3351
3352    // GFM: a `*`/`_` run touching a `~` strikethrough marker may open/close even
3353    // when ordinary flanking refuses it (the `~` would otherwise be a blocking
3354    // punctuation neighbour). Tilde itself never receives this bonus.
3355    if strikethrough && marker != b'~' {
3356        let before = input[..index].chars().next_back();
3357        let after = input[index + length..].chars().next();
3358        if after == Some('~') {
3359            can_open = true;
3360        }
3361        if before == Some('~') {
3362            can_close = true;
3363        }
3364    }
3365
3366    let value = String::from(marker as char).repeat(length);
3367
3368    let node_index = nodes.len();
3369    nodes.push(Inline::Text(Text {
3370        meta: NodeMeta::new(Some(Span::new(
3371            base_offset + index,
3372            base_offset + index + length,
3373        ))),
3374        value,
3375    }));
3376
3377    delimiters.push(DelimMarker {
3378        node_index,
3379        marker,
3380        length,
3381        can_open,
3382        can_close,
3383        span_start: base_offset + index,
3384        inactive: false,
3385    });
3386}
3387
3388/// Resolves recorded `*`/`_` delimiter runs into `Emphasis`/`Strong` nodes using
3389/// the CommonMark delimiter-stack algorithm, leaving unmatched runs as text.
3390fn process_emphasis(mut nodes: Vec<Inline>, mut delimiters: Vec<DelimMarker>) -> Vec<Inline> {
3391    if delimiters.is_empty() {
3392        return nodes;
3393    }
3394
3395    // `openers_bottom` records, per (marker, opener-can-also-close, length % 3),
3396    // the lowest opener index a closer is allowed to reach. Closers below this
3397    // bound for their key have already been proven to have no compatible opener.
3398    // Three markers (`*`, `_`, `~`) × both-flag × length%3.
3399    let mut openers_bottom: [Option<usize>; 18] = [None; 18];
3400    let mut closer_idx = 0;
3401
3402    while closer_idx < delimiters.len() {
3403        let closer = delimiters[closer_idx];
3404        if closer.inactive || !closer.can_close {
3405            closer_idx += 1;
3406            continue;
3407        }
3408
3409        let key = openers_bottom_key(&closer);
3410        let bottom = openers_bottom[key];
3411
3412        // Walk back to the nearest compatible opener above the recorded bound.
3413        let mut opener_idx = None;
3414        let mut search = closer_idx;
3415        while search > 0 {
3416            search -= 1;
3417            if let Some(bottom) = bottom {
3418                if search < bottom {
3419                    break;
3420                }
3421            }
3422            let candidate = delimiters[search];
3423            if candidate.inactive || candidate.marker != closer.marker || !candidate.can_open {
3424                continue;
3425            }
3426            if emphasis_delimiters_match(&candidate, &closer) {
3427                opener_idx = Some(search);
3428                break;
3429            }
3430        }
3431
3432        let Some(opener_idx) = opener_idx else {
3433            // No opener found: remember how far we searched so future closers of
3434            // the same key skip the same dead range. A closer that cannot also
3435            // open is removed so it is never revisited.
3436            openers_bottom[key] = Some(closer_idx);
3437            if !closer.can_open {
3438                delimiters[closer_idx].inactive = true;
3439            }
3440            closer_idx += 1;
3441            continue;
3442        };
3443
3444        let (used, wrap) = if closer.marker == b'~' {
3445            // Strikethrough consumes the whole (equal-length) run on each side at
3446            // once; the marker width selects the `Delete` flavour.
3447            let length = delimiters[closer_idx].length;
3448            let marker = if length >= 2 {
3449                DeleteMarker::DoubleTilde
3450            } else {
3451                DeleteMarker::SingleTilde
3452            };
3453            (length, EmphasisWrap::Delete(marker))
3454        } else {
3455            let strong = delimiters[opener_idx].length >= 2 && delimiters[closer_idx].length >= 2;
3456            let used = if strong { 2 } else { 1 };
3457            let wrap = if strong {
3458                EmphasisWrap::Strong
3459            } else {
3460                EmphasisWrap::Emphasis
3461            };
3462            (used, wrap)
3463        };
3464
3465        apply_emphasis(
3466            &mut nodes,
3467            &mut delimiters,
3468            opener_idx,
3469            closer_idx,
3470            used,
3471            wrap,
3472        );
3473
3474        // Drop delimiters strictly between the opener and closer: they could not
3475        // match outward across this newly closed span.
3476        let mut inner = opener_idx + 1;
3477        while inner < closer_idx {
3478            delimiters[inner].inactive = true;
3479            inner += 1;
3480        }
3481
3482        if delimiters[opener_idx].length == 0 {
3483            delimiters[opener_idx].inactive = true;
3484        }
3485        if delimiters[closer_idx].length == 0 {
3486            delimiters[closer_idx].inactive = true;
3487            closer_idx += 1;
3488        }
3489        // When the closer still has delimiters left it stays the active closer so
3490        // the leftover can match an earlier opener (e.g. `***foo*` keeps `**`).
3491    }
3492
3493    // Adjacent text nodes can appear where unmatched delimiter runs ended up
3494    // beside literal text (`**foo*bar*` -> `**foo` + emphasis). CommonMark
3495    // coalesces them as the final step; do the same for the spans we created.
3496    merge_adjacent_text(&mut nodes);
3497    nodes
3498}
3499
3500/// Merges consecutive `Text` nodes in a list, recursing into the `Emphasis`/
3501/// `Strong` nodes produced at this level. Other containers were already
3502/// finalized by their own `parse_inlines` pass and are left untouched.
3503fn merge_adjacent_text(nodes: &mut Vec<Inline>) {
3504    let mut write = 0;
3505    for read in 0..nodes.len() {
3506        if read != write {
3507            nodes.swap(read, write);
3508        }
3509        if write > 0 {
3510            let (head, tail) = nodes.split_at_mut(write);
3511            if let (Inline::Text(previous), Inline::Text(current)) =
3512                (&mut head[write - 1], &tail[0])
3513            {
3514                previous.value.push_str(&current.value);
3515                if let (Some(previous_span), Some(current_span)) =
3516                    (previous.meta.span.as_mut(), current.meta.span)
3517                {
3518                    previous_span.end = current_span.end;
3519                }
3520                continue;
3521            }
3522        }
3523        write += 1;
3524    }
3525    nodes.truncate(write);
3526
3527    for node in nodes.iter_mut() {
3528        match node {
3529            Inline::Emphasis(emphasis) => merge_adjacent_text(&mut emphasis.children),
3530            Inline::Strong(strong) => merge_adjacent_text(&mut strong.children),
3531            Inline::Delete(delete) => merge_adjacent_text(&mut delete.children),
3532            _ => {}
3533        }
3534    }
3535}
3536
3537/// Index into `openers_bottom` for a closer's (marker, both-flags, length%3) key.
3538fn openers_bottom_key(closer: &DelimMarker) -> usize {
3539    let marker = match closer.marker {
3540        b'_' => 1,
3541        b'~' => 2,
3542        _ => 0,
3543    };
3544    let both = usize::from(closer.can_open && closer.can_close);
3545    let modulo = closer.length % 3;
3546    ((marker * 2) + both) * 3 + modulo
3547}
3548
3549/// CommonMark opener/closer compatibility, including the rule of three.
3550fn emphasis_delimiters_match(opener: &DelimMarker, closer: &DelimMarker) -> bool {
3551    // GFM strikethrough: opener and closer runs must be the same length (a `~`
3552    // never pairs with `~~`). The rule of three does not apply to `~`.
3553    if opener.marker == b'~' {
3554        return opener.length == closer.length;
3555    }
3556
3557    // Rule of three: if either delimiter can both open and close, the sum of the
3558    // two run lengths must not be a multiple of three, unless both lengths are
3559    // themselves multiples of three.
3560    let opener_both = opener.can_open && opener.can_close;
3561    let closer_both = closer.can_open && closer.can_close;
3562    if opener_both || closer_both {
3563        let sum = opener.length + closer.length;
3564        if sum % 3 == 0 && !(opener.length % 3 == 0 && closer.length % 3 == 0) {
3565            return false;
3566        }
3567    }
3568    true
3569}
3570
3571/// The node a matched delimiter pair collapses into.
3572#[derive(Clone, Copy)]
3573enum EmphasisWrap {
3574    Emphasis,
3575    Strong,
3576    Delete(DeleteMarker),
3577}
3578
3579/// Wraps the nodes between two delimiter runs into an `Emphasis`/`Strong`/
3580/// `Delete` node, consuming `used` characters from each side and keeping every
3581/// other delimiter's `node_index` consistent with the rewritten node list.
3582fn apply_emphasis(
3583    nodes: &mut Vec<Inline>,
3584    delimiters: &mut [DelimMarker],
3585    opener_idx: usize,
3586    closer_idx: usize,
3587    used: usize,
3588    wrap: EmphasisWrap,
3589) {
3590    let opener_node = delimiters[opener_idx].node_index;
3591    let closer_node = delimiters[closer_idx].node_index;
3592
3593    // Trim the consumed characters from the opener's text node (right side) and
3594    // the closer's text node (left side), updating their recorded lengths/spans.
3595    trim_delimiter_text_tail(&mut nodes[opener_node], used);
3596    delimiters[opener_idx].length -= used;
3597    delimiters[opener_idx].span_start += used;
3598
3599    trim_delimiter_text_head(&mut nodes[closer_node], used);
3600    delimiters[closer_idx].length -= used;
3601
3602    // Span covers the consumed opener delimiters through the consumed closer
3603    // delimiters. The exact value is informational; structure is what matters.
3604    let span_start = delimiters[opener_idx].span_start - used;
3605    let span_end = delimiters[closer_idx].span_start + delimiters[closer_idx].length + used;
3606
3607    // The wrapped children are the nodes strictly between the opener and closer
3608    // text nodes.
3609    let children_start = opener_node + 1;
3610    let children_end = closer_node; // exclusive
3611    let children: Vec<Inline> = nodes.drain(children_start..children_end).collect();
3612    let removed = children.len();
3613
3614    let meta = NodeMeta::new(Some(Span::new(span_start, span_end)));
3615    let wrapped = match wrap {
3616        EmphasisWrap::Strong => Inline::Strong(Strong { meta, children }),
3617        EmphasisWrap::Emphasis => Inline::Emphasis(Emphasis { meta, children }),
3618        EmphasisWrap::Delete(marker) => Inline::Delete(Delete {
3619            meta,
3620            marker,
3621            children,
3622        }),
3623    };
3624    nodes.insert(children_start, wrapped);
3625
3626    // Indices at or past the (old) closer node shift by `1 - removed`: the drain
3627    // removed `removed` nodes then the insert added one. Apply this using the
3628    // original `children_end` threshold before any further mutation.
3629    reindex_delimiters(delimiters, children_end, 1 - removed as isize);
3630
3631    // Drop any placeholder text node that has been fully consumed so leftover
3632    // delimiters never survive as literal text. Remove the closer first because
3633    // it sits at the higher index and removal shifts everything after it.
3634    if delimiters[closer_idx].length == 0 {
3635        let pos = delimiters[closer_idx].node_index;
3636        nodes.remove(pos);
3637        reindex_delimiters(delimiters, pos, -1);
3638    }
3639    if delimiters[opener_idx].length == 0 {
3640        let pos = delimiters[opener_idx].node_index;
3641        nodes.remove(pos);
3642        reindex_delimiters(delimiters, pos, -1);
3643    }
3644}
3645
3646/// Adjusts `node_index` for every delimiter at or after `from` by `delta`.
3647fn reindex_delimiters(delimiters: &mut [DelimMarker], from: usize, delta: isize) {
3648    if delta == 0 {
3649        return;
3650    }
3651    for delimiter in delimiters.iter_mut() {
3652        if delimiter.node_index >= from {
3653            delimiter.node_index = (delimiter.node_index as isize + delta) as usize;
3654        }
3655    }
3656}
3657
3658/// Removes `count` trailing delimiter characters from a placeholder text node.
3659fn trim_delimiter_text_tail(node: &mut Inline, count: usize) {
3660    if let Inline::Text(text) = node {
3661        let new_len = text.value.len().saturating_sub(count);
3662        text.value.truncate(new_len);
3663        if let Some(span) = text.meta.span.as_mut() {
3664            span.end = span.end.saturating_sub(count);
3665        }
3666    }
3667}
3668
3669/// Removes `count` leading delimiter characters from a placeholder text node.
3670fn trim_delimiter_text_head(node: &mut Inline, count: usize) {
3671    if let Inline::Text(text) = node {
3672        let count = count.min(text.value.len());
3673        text.value.drain(..count);
3674        if let Some(span) = text.meta.span.as_mut() {
3675            span.start += count;
3676        }
3677    }
3678}
3679
3680fn parse_inlines(
3681    input: &str,
3682    base_offset: usize,
3683    options: &SyntaxOptions,
3684    definitions: &[String],
3685    diagnostics: &mut Vec<Diagnostic>,
3686) -> Vec<Inline> {
3687    parse_inlines_with_context(
3688        input,
3689        base_offset,
3690        options,
3691        definitions,
3692        diagnostics,
3693        InlineContext::default(),
3694    )
3695}
3696
3697#[derive(Clone, Copy)]
3698struct InlineContext {
3699    allow_links: bool,
3700}
3701
3702impl Default for InlineContext {
3703    fn default() -> Self {
3704        Self { allow_links: true }
3705    }
3706}
3707
3708fn parse_inlines_with_context(
3709    input: &str,
3710    base_offset: usize,
3711    options: &SyntaxOptions,
3712    definitions: &[String],
3713    diagnostics: &mut Vec<Diagnostic>,
3714    context: InlineContext,
3715) -> Vec<Inline> {
3716    let bytes = input.as_bytes();
3717    let mut nodes = Vec::new();
3718    let mut text_start = 0;
3719    let mut text = String::new();
3720    let mut index = 0;
3721    // Core `*`/`_` emphasis is resolved with a CommonMark delimiter stack after
3722    // the scan completes. During the scan we emit each candidate delimiter run as
3723    // a literal text node and record its position here so `process_emphasis` can
3724    // rewrite the flat node list into Emphasis/Strong (or leave it as text).
3725    let mut delimiters: Vec<DelimMarker> = Vec::new();
3726
3727    while index < bytes.len() {
3728        if bytes[index] == b'\\' {
3729            if let Some((next_index, char)) = next_char(input, index + 1) {
3730                if char.is_ascii_punctuation() {
3731                    if options.parse.preserve_character_escapes {
3732                        flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3733                        nodes.push(Inline::Escape(Escape {
3734                            meta: NodeMeta::new(Some(Span::new(
3735                                base_offset + index,
3736                                base_offset + next_index,
3737                            ))),
3738                            value: char,
3739                        }));
3740                        index = next_index;
3741                        text_start = index;
3742                        continue;
3743                    }
3744                    if text.is_empty() {
3745                        text_start = base_offset + index;
3746                    }
3747                    if gfm_link_label_preserves_url_dot_escape(&text, char, options, context) {
3748                        text.push('\\');
3749                    }
3750                    text.push(char);
3751                    index = next_index;
3752                    continue;
3753                }
3754            }
3755        }
3756
3757        if bytes[index] == b'&' {
3758            if let Some((end, value)) = parse_character_reference(input, index) {
3759                if options.parse.preserve_character_references {
3760                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3761                    nodes.push(Inline::CharacterReference(CharacterReference {
3762                        meta: NodeMeta::new(Some(Span::new(
3763                            base_offset + index,
3764                            base_offset + end,
3765                        ))),
3766                        reference: input[index..end].into(),
3767                        value,
3768                    }));
3769                    index = end;
3770                    text_start = index;
3771                    continue;
3772                }
3773                if text.is_empty() {
3774                    text_start = base_offset + index;
3775                }
3776                text.push_str(&value);
3777                index = end;
3778                continue;
3779            }
3780        }
3781
3782        if bytes[index] == b'\n' {
3783            if text.ends_with('\\') {
3784                text.pop();
3785                flush_text(
3786                    &mut nodes,
3787                    &mut text,
3788                    text_start,
3789                    base_offset + index.saturating_sub(1),
3790                );
3791                nodes.push(Inline::LineBreak(LineBreak {
3792                    meta: NodeMeta::new(Some(Span::new(
3793                        base_offset + index.saturating_sub(1),
3794                        base_offset + index + 1,
3795                    ))),
3796                    kind: LineBreakKind::Backslash,
3797                }));
3798                index += 1;
3799                text_start = index;
3800                continue;
3801            }
3802            let trailing_spaces = trailing_space_count(&text);
3803            if is_hard_break_suffix(&text, trailing_spaces) {
3804                text.truncate(text.len() - trailing_spaces);
3805                flush_text(
3806                    &mut nodes,
3807                    &mut text,
3808                    text_start,
3809                    base_offset + index.saturating_sub(trailing_spaces),
3810                );
3811                nodes.push(Inline::LineBreak(LineBreak {
3812                    meta: NodeMeta::new(Some(Span::new(
3813                        base_offset + index.saturating_sub(trailing_spaces),
3814                        base_offset + index + 1,
3815                    ))),
3816                    kind: LineBreakKind::Spaces,
3817                }));
3818                index += 1;
3819                text_start = index;
3820                continue;
3821            }
3822            if trailing_spaces > 0 {
3823                text.truncate(text.len() - trailing_spaces);
3824            }
3825            flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3826            nodes.push(Inline::SoftBreak(SoftBreak {
3827                meta: NodeMeta::new(Some(Span::new(
3828                    base_offset + index,
3829                    base_offset + index + 1,
3830                ))),
3831            }));
3832            index += 1;
3833            text_start = index;
3834            continue;
3835        }
3836
3837        if bytes[index] == b'`' {
3838            if let Some((end, code_span)) = parse_code_span(input, index) {
3839                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3840                nodes.push(Inline::Code(CodeInline {
3841                    meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
3842                    value: code_span.value,
3843                    raw: code_span.raw,
3844                    fence_length: code_span.fence_length,
3845                }));
3846                index = end;
3847                text_start = index;
3848                continue;
3849            } else {
3850                // No matching-length close for this opening backtick run:
3851                // CommonMark renders the whole run as literal text. Consume the
3852                // entire run here so the loop does not advance one byte and retry
3853                // a shorter sub-run that could spuriously match a shorter close
3854                // (```foo`` stayed a phantom 2-backtick code span).
3855                let run = bytes[index..]
3856                    .iter()
3857                    .take_while(|byte| **byte == b'`')
3858                    .count();
3859                if text.is_empty() {
3860                    text_start = base_offset + index;
3861                }
3862                for _ in 0..run {
3863                    text.push('`');
3864                }
3865                index += run;
3866                continue;
3867            }
3868        }
3869
3870        if options.constructs.spoiler
3871            && bytes.get(index) == Some(&b'|')
3872            && bytes.get(index + 1) == Some(&b'|')
3873            && bytes.get(index + 2) != Some(&b'|')
3874        {
3875            if let Some(end) = find_spoiler_close(input, index + 2) {
3876                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3877                let inner = &input[index + 2..end];
3878                nodes.push(Inline::Spoiler(Spoiler {
3879                    meta: NodeMeta::new(Some(Span::new(
3880                        base_offset + index,
3881                        base_offset + end + 2,
3882                    ))),
3883                    children: parse_inlines_with_context(
3884                        inner,
3885                        base_offset + index + 2,
3886                        options,
3887                        definitions,
3888                        diagnostics,
3889                        context,
3890                    ),
3891                }));
3892                index = end + 2;
3893                text_start = index;
3894                continue;
3895            }
3896        }
3897
3898        if bytes[index] == b'*' && delimiter_byte_run_start(input, index, b'*') == index {
3899            let run_len = delimiter_byte_run_len(input, index, b'*');
3900            flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3901            record_emphasis_delimiter(
3902                &mut nodes,
3903                &mut delimiters,
3904                input,
3905                index,
3906                base_offset,
3907                b'*',
3908                options.constructs.gfm_strikethrough,
3909            );
3910            index += run_len;
3911            text_start = index;
3912            continue;
3913        }
3914
3915        if options.constructs.underline
3916            && bytes.get(index) == Some(&b'_')
3917            && bytes.get(index + 1) == Some(&b'_')
3918            && bytes.get(index + 2) == Some(&b'_')
3919            && can_open_underscore(input, index, 1)
3920        {
3921            if let Some(end) = find_closing_delimiter(input, index + 3, "___", true) {
3922                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3923                let inner = &input[index + 3..end];
3924                let underline = Inline::Underline(Underline {
3925                    meta: NodeMeta::new(Some(Span::new(
3926                        base_offset + index + 1,
3927                        base_offset + end + 2,
3928                    ))),
3929                    children: parse_inlines_with_context(
3930                        inner,
3931                        base_offset + index + 3,
3932                        options,
3933                        definitions,
3934                        diagnostics,
3935                        context,
3936                    ),
3937                });
3938                nodes.push(Inline::Emphasis(Emphasis {
3939                    meta: NodeMeta::new(Some(Span::new(
3940                        base_offset + index,
3941                        base_offset + end + 3,
3942                    ))),
3943                    children: vec![underline],
3944                }));
3945                index = end + 3;
3946                text_start = index;
3947                continue;
3948            }
3949        }
3950
3951        if options.constructs.underline
3952            && bytes.get(index) == Some(&b'_')
3953            && bytes.get(index + 1) == Some(&b'_')
3954            && can_open_underscore(input, index, 2)
3955        {
3956            if let Some(end) = find_closing_delimiter(input, index + 2, "__", true) {
3957                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3958                let inner = &input[index + 2..end];
3959                nodes.push(Inline::Underline(Underline {
3960                    meta: NodeMeta::new(Some(Span::new(
3961                        base_offset + index,
3962                        base_offset + end + 2,
3963                    ))),
3964                    children: parse_inlines_with_context(
3965                        inner,
3966                        base_offset + index + 2,
3967                        options,
3968                        definitions,
3969                        diagnostics,
3970                        context,
3971                    ),
3972                }));
3973                index = end + 2;
3974                text_start = index;
3975                continue;
3976            }
3977        }
3978
3979        // Core `_` emphasis/strong is resolved by the delimiter stack, just like
3980        // `*`. The `___`/`__` underline-extension branches above run first and
3981        // `continue` when they consume the run, so reaching this point means the
3982        // run is plain emphasis material (underline disabled, or no underline
3983        // close was found).
3984        if bytes[index] == b'_' && delimiter_byte_run_start(input, index, b'_') == index {
3985            // A leading `_` can begin a GFM email local part (`_a@b.c`); try the
3986            // literal autolink before recording the `_` as an emphasis
3987            // delimiter, otherwise the `_` would be consumed and the email would
3988            // wrongly start one char later (where its left boundary fails).
3989            if (options.constructs.gfm_autolink_literal || options.constructs.relaxed_autolinks)
3990                && context.allow_links
3991            {
3992                if let Some((end, destination)) = parse_literal_autolink(
3993                    input,
3994                    index,
3995                    options.constructs.gfm_autolink_literal,
3996                    options.constructs.relaxed_autolinks,
3997                ) {
3998                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3999                    nodes.push(Inline::Autolink(Autolink {
4000                        meta: NodeMeta::new(Some(Span::new(
4001                            base_offset + index,
4002                            base_offset + end,
4003                        ))),
4004                        destination,
4005                        kind: AutolinkKind::GfmLiteral {
4006                            original: input[index..end].into(),
4007                        },
4008                    }));
4009                    index = end;
4010                    text_start = index;
4011                    continue;
4012                }
4013            }
4014            let run_len = delimiter_byte_run_len(input, index, b'_');
4015            flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4016            record_emphasis_delimiter(
4017                &mut nodes,
4018                &mut delimiters,
4019                input,
4020                index,
4021                base_offset,
4022                b'_',
4023                options.constructs.gfm_strikethrough,
4024            );
4025            index += run_len;
4026            text_start = index;
4027            continue;
4028        }
4029
4030        if options.constructs.insert
4031            && bytes.get(index) == Some(&b'+')
4032            && bytes.get(index + 1) == Some(&b'+')
4033            && bytes.get(index + 2) != Some(&b'+')
4034            && can_open_delimited(input, index, 2)
4035        {
4036            if let Some(end) = find_closing_delimiter(input, index + 2, "++", false) {
4037                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4038                let inner = &input[index + 2..end];
4039                nodes.push(Inline::Insert(Insert {
4040                    meta: NodeMeta::new(Some(Span::new(
4041                        base_offset + index,
4042                        base_offset + end + 2,
4043                    ))),
4044                    children: parse_inlines_with_context(
4045                        inner,
4046                        base_offset + index + 2,
4047                        options,
4048                        definitions,
4049                        diagnostics,
4050                        context,
4051                    ),
4052                }));
4053                index = end + 2;
4054                text_start = index;
4055                continue;
4056            }
4057        }
4058
4059        if options.constructs.highlight
4060            && bytes.get(index) == Some(&b'=')
4061            && bytes.get(index + 1) == Some(&b'=')
4062            && bytes.get(index + 2) != Some(&b'=')
4063            && can_open_delimited(input, index, 2)
4064        {
4065            if let Some(end) = find_closing_delimiter(input, index + 2, "==", false) {
4066                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4067                let inner = &input[index + 2..end];
4068                nodes.push(Inline::Mark(Mark {
4069                    meta: NodeMeta::new(Some(Span::new(
4070                        base_offset + index,
4071                        base_offset + end + 2,
4072                    ))),
4073                    children: parse_inlines_with_context(
4074                        inner,
4075                        base_offset + index + 2,
4076                        options,
4077                        definitions,
4078                        diagnostics,
4079                        context,
4080                    ),
4081                }));
4082                index = end + 2;
4083                text_start = index;
4084                continue;
4085            }
4086        }
4087
4088        if options.constructs.subscript
4089            && starts_exact_byte_run(input, index, b'~', 1)
4090            && !single_tilde_delete_takes_precedence(options, input, index)
4091        {
4092            if let Some(end) = find_simple_inline_close(input, index + 1, b'~') {
4093                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4094                let inner = &input[index + 1..end];
4095                nodes.push(Inline::Subscript(Subscript {
4096                    meta: NodeMeta::new(Some(Span::new(
4097                        base_offset + index,
4098                        base_offset + end + 1,
4099                    ))),
4100                    children: parse_inlines_with_context(
4101                        inner,
4102                        base_offset + index + 1,
4103                        options,
4104                        definitions,
4105                        diagnostics,
4106                        context,
4107                    ),
4108                }));
4109                index = end + 1;
4110                text_start = index;
4111                continue;
4112            }
4113        }
4114
4115        if options.constructs.inline_footnote
4116            && options.constructs.footnote_reference
4117            && bytes.get(index) == Some(&b'^')
4118            && bytes.get(index + 1) == Some(&b'[')
4119        {
4120            if let Some(close) = find_inline_footnote_end(input, index + 2) {
4121                let inner = &input[index + 2..close];
4122                if !inner.trim().is_empty() {
4123                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4124                    nodes.push(Inline::InlineFootnote(InlineFootnote {
4125                        meta: NodeMeta::new(Some(Span::new(
4126                            base_offset + index,
4127                            base_offset + close + 1,
4128                        ))),
4129                        children: parse_inlines_with_context(
4130                            inner,
4131                            base_offset + index + 2,
4132                            options,
4133                            definitions,
4134                            diagnostics,
4135                            context,
4136                        ),
4137                    }));
4138                    index = close + 1;
4139                    text_start = index;
4140                    continue;
4141                }
4142            }
4143        }
4144
4145        if options.constructs.superscript
4146            && bytes.get(index) == Some(&b'^')
4147            && !(options.constructs.inline_footnote && bytes.get(index + 1) == Some(&b'['))
4148        {
4149            if let Some(end) = find_simple_inline_close(input, index + 1, b'^') {
4150                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4151                let inner = &input[index + 1..end];
4152                nodes.push(Inline::Superscript(Superscript {
4153                    meta: NodeMeta::new(Some(Span::new(
4154                        base_offset + index,
4155                        base_offset + end + 1,
4156                    ))),
4157                    children: parse_inlines_with_context(
4158                        inner,
4159                        base_offset + index + 1,
4160                        options,
4161                        definitions,
4162                        diagnostics,
4163                        context,
4164                    ),
4165                }));
4166                index = end + 1;
4167                text_start = index;
4168                continue;
4169            }
4170        }
4171
4172        // GFM strikethrough joins the shared CommonMark delimiter stack: a `~`
4173        // run is recorded as a candidate run (just like `*`/`_`) and paired into
4174        // `Delete` by `process_emphasis`, rather than scanned greedily here. Only
4175        // runs of length 1 (single-tilde mode) or 2 can ever form strikethrough;
4176        // runs of 3+ never do, so they fall through to literal text. The
4177        // subscript branch above already claimed single `~` runs it owns.
4178        if options.constructs.gfm_strikethrough
4179            && bytes[index] == b'~'
4180            && delimiter_byte_run_start(input, index, b'~') == index
4181        {
4182            let run_len = delimiter_byte_run_len(input, index, b'~');
4183            let recordable =
4184                run_len == 2 || (run_len == 1 && options.parse.single_tilde_strikethrough);
4185            if recordable {
4186                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4187                record_emphasis_delimiter(
4188                    &mut nodes,
4189                    &mut delimiters,
4190                    input,
4191                    index,
4192                    base_offset,
4193                    b'~',
4194                    true,
4195                );
4196                index += run_len;
4197                text_start = index;
4198                continue;
4199            }
4200        }
4201
4202        if bytes[index] == b'!' && index + 1 < bytes.len() && bytes[index + 1] == b'[' {
4203            if let Some((end, image)) =
4204                parse_image(input, index, base_offset, options, definitions, diagnostics)
4205            {
4206                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4207                nodes.push(image);
4208                index = end;
4209                text_start = index;
4210                continue;
4211            }
4212        }
4213
4214        if bytes[index] == b'[' {
4215            if let Some((end, wikilink)) = parse_wikilink(input, index, base_offset, options) {
4216                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4217                nodes.push(wikilink);
4218                index = end;
4219                text_start = index;
4220                continue;
4221            }
4222            if let Some((end, link)) = parse_link(
4223                input,
4224                index,
4225                base_offset,
4226                options,
4227                definitions,
4228                diagnostics,
4229                context,
4230            ) {
4231                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4232                nodes.push(link);
4233                index = end;
4234                text_start = index;
4235                continue;
4236            }
4237            if options.constructs.footnote_reference
4238                && bytes.get(index) == Some(&b'[')
4239                && bytes.get(index + 1) == Some(&b'^')
4240            {
4241                if let Some(close) = find_footnote_reference_label_end(input, index + 2) {
4242                    let label = &input[index + 2..close];
4243                    if is_footnote_label(label) {
4244                        flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4245                        nodes.push(Inline::FootnoteReference(FootnoteReference {
4246                            meta: NodeMeta::new(Some(Span::new(
4247                                base_offset + index,
4248                                base_offset + close + 1,
4249                            ))),
4250                            label: label.into(),
4251                            identifier: normalize_label(label),
4252                        }));
4253                        index = close + 1;
4254                        text_start = index;
4255                        continue;
4256                    }
4257                }
4258            }
4259        }
4260
4261        if bytes[index] == b'$' && options.constructs.math_inline {
4262            if let Some((end, value, kind)) = parse_math_inline(input, index) {
4263                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4264                nodes.push(Inline::Math(MathInline {
4265                    meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4266                    value,
4267                    kind,
4268                }));
4269                index = end;
4270                text_start = index;
4271                continue;
4272            }
4273            // A dollar run that opens but finds no exact-length close is emitted
4274            // as literal text in one piece (like a code-span). Skipping the
4275            // whole run prevents re-opening with a shorter marker inside it, so
4276            // `$$$foo$$` stays literal rather than matching `$$foo$$`. A lone
4277            // `$` before a backtick (the code-math form) is a run of 1, so this
4278            // still advances correctly when that form fails.
4279            let run = bytes[index..]
4280                .iter()
4281                .take_while(|byte| **byte == b'$')
4282                .count();
4283            if run > 1 {
4284                if text.is_empty() {
4285                    text_start = base_offset + index;
4286                }
4287                text.push_str(&input[index..index + run]);
4288                index += run;
4289                continue;
4290            }
4291        }
4292
4293        // GFM bare autolinks must not fire inside an existing link's text
4294        // (no links in links) — `context.allow_links` is false in label scans.
4295        if (options.constructs.gfm_autolink_literal || options.constructs.relaxed_autolinks)
4296            && context.allow_links
4297        {
4298            if let Some((end, destination)) = parse_literal_autolink(
4299                input,
4300                index,
4301                options.constructs.gfm_autolink_literal,
4302                options.constructs.relaxed_autolinks,
4303            ) {
4304                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4305                nodes.push(Inline::Autolink(Autolink {
4306                    meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4307                    destination,
4308                    kind: AutolinkKind::GfmLiteral {
4309                        original: input[index..end].into(),
4310                    },
4311                }));
4312                index = end;
4313                text_start = index;
4314                continue;
4315            }
4316        }
4317
4318        if bytes[index] == b'<' {
4319            if let Some(end) = parse_autolink_end(input, index) {
4320                let raw = &input[index..end];
4321                if is_autolink(raw) {
4322                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4323                    if context.allow_links {
4324                        nodes.push(Inline::Autolink(Autolink {
4325                            meta: NodeMeta::new(Some(Span::new(
4326                                base_offset + index,
4327                                base_offset + end,
4328                            ))),
4329                            destination: raw[1..raw.len() - 1].into(),
4330                            kind: AutolinkKind::Angle,
4331                        }));
4332                    } else {
4333                        nodes.push(Inline::Text(Text {
4334                            meta: NodeMeta::new(Some(Span::new(
4335                                base_offset + index,
4336                                base_offset + end,
4337                            ))),
4338                            value: raw[1..raw.len() - 1].into(),
4339                        }));
4340                    }
4341                    index = end;
4342                    text_start = index;
4343                    continue;
4344                }
4345            }
4346            if options.constructs.mdx_jsx_inline {
4347                if let Some((end, raw)) = parse_mdx_jsx_inline(input, index) {
4348                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4349                    nodes.push(Inline::MdxJsx(MdxJsxInline {
4350                        meta: NodeMeta::new(Some(Span::new(
4351                            base_offset + index,
4352                            base_offset + end,
4353                        ))),
4354                        value: raw,
4355                    }));
4356                    index = end;
4357                    text_start = index;
4358                    continue;
4359                }
4360            }
4361            if let Some((end, raw)) = parse_html_inline(input, index) {
4362                if options.constructs.html_inline {
4363                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4364                    nodes.push(Inline::Html(HtmlInline {
4365                        meta: NodeMeta::new(Some(Span::new(
4366                            base_offset + index,
4367                            base_offset + end,
4368                        ))),
4369                        value: raw,
4370                    }));
4371                    index = end;
4372                    text_start = index;
4373                    continue;
4374                }
4375            }
4376        }
4377
4378        if bytes[index] == b'{' && options.constructs.mdx_expression_inline {
4379            if let Some(end) = find_mdx_expression_inline_close(input, index) {
4380                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4381                nodes.push(Inline::MdxExpression(MdxExpressionInline {
4382                    meta: NodeMeta::new(Some(Span::new(
4383                        base_offset + index,
4384                        base_offset + end + 1,
4385                    ))),
4386                    value: input[index + 1..end].into(),
4387                }));
4388                index = end + 1;
4389                text_start = index;
4390                continue;
4391            } else {
4392                diagnostics.push(Diagnostic::new(
4393                    DiagnosticSeverity::Error,
4394                    DiagnosticCode::InvalidMdx,
4395                    Span::new(base_offset + index, base_offset + input.len()),
4396                    "MDX expression is missing a closing brace",
4397                ));
4398            }
4399        }
4400
4401        if bytes[index] == b':' && options.constructs.shortcode {
4402            if let Some((end, name)) = parse_shortcode(input, index) {
4403                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4404                nodes.push(Inline::Shortcode(Shortcode {
4405                    meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4406                    name,
4407                }));
4408                index = end;
4409                text_start = index;
4410                continue;
4411            }
4412        }
4413
4414        if bytes[index] == b':' && options.constructs.directive_text {
4415            if let Some((end, directive)) =
4416                parse_text_directive(input, index, base_offset, options, definitions, diagnostics)
4417            {
4418                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4419                nodes.push(directive);
4420                index = end;
4421                text_start = index;
4422                continue;
4423            }
4424        }
4425
4426        let (next_index, char) = next_char(input, index).expect("valid UTF-8 byte index");
4427        if text.is_empty() {
4428            text_start = base_offset + index;
4429        }
4430        text.push(if char == '\0' { '\u{FFFD}' } else { char });
4431        index = next_index;
4432    }
4433
4434    flush_text(&mut nodes, &mut text, text_start, base_offset + input.len());
4435    process_emphasis(nodes, delimiters)
4436}
4437
4438fn parse_shortcode(input: &str, index: usize) -> Option<(usize, String)> {
4439    if input[index..].starts_with("::") {
4440        return None;
4441    }
4442
4443    let mut cursor = index + 1;
4444    while let Some((next, char)) = next_char(input, cursor) {
4445        if char == ':' {
4446            if cursor == index + 1 {
4447                return None;
4448            }
4449            return Some((next, input[index + 1..cursor].into()));
4450        }
4451        if !(char.is_ascii_alphanumeric() || matches!(char, '_' | '-' | '+')) {
4452            return None;
4453        }
4454        cursor = next;
4455    }
4456    None
4457}
4458
4459fn parse_wikilink(
4460    input: &str,
4461    index: usize,
4462    base_offset: usize,
4463    options: &SyntaxOptions,
4464) -> Option<(usize, Inline)> {
4465    let configured_order = if options.constructs.wikilink_title_after_pipe {
4466        WikiLinkLabelOrder::AfterPipe
4467    } else if options.constructs.wikilink_title_before_pipe {
4468        WikiLinkLabelOrder::BeforePipe
4469    } else {
4470        return None;
4471    };
4472    if input.as_bytes().get(index) != Some(&b'[') || input.as_bytes().get(index + 1) != Some(&b'[')
4473    {
4474        return None;
4475    }
4476
4477    let close = find_wikilink_close(input, index + 2)?;
4478    let source = &input[index + 2..close];
4479    if source.is_empty() || source.len() > WIKILINK_MAX_BYTES {
4480        return None;
4481    }
4482
4483    let (target_source, label_source, label_order) =
4484        if let Some(separator) = find_wikilink_separator(source) {
4485            match configured_order {
4486                WikiLinkLabelOrder::AfterPipe => (
4487                    &source[..separator],
4488                    &source[separator + 1..],
4489                    WikiLinkLabelOrder::AfterPipe,
4490                ),
4491                WikiLinkLabelOrder::BeforePipe => (
4492                    &source[separator + 1..],
4493                    &source[..separator],
4494                    WikiLinkLabelOrder::BeforePipe,
4495                ),
4496            }
4497        } else {
4498            (source, source, configured_order)
4499        };
4500
4501    let target = unescape_string(target_source);
4502    if target.is_empty() {
4503        return None;
4504    }
4505    let label = unescape_string(label_source);
4506    let end = close + 2;
4507    Some((
4508        end,
4509        Inline::WikiLink(WikiLink {
4510            meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4511            target,
4512            label,
4513            label_order,
4514        }),
4515    ))
4516}
4517
4518fn find_wikilink_close(input: &str, start: usize) -> Option<usize> {
4519    let bytes = input.as_bytes();
4520    let mut cursor = start;
4521    while cursor < input.len() {
4522        match bytes[cursor] {
4523            b'\\' => {
4524                cursor += 1;
4525                if cursor < input.len() {
4526                    cursor = next_char(input, cursor)?.0;
4527                }
4528            }
4529            b'\n' | b'\r' => return None,
4530            b']' if bytes.get(cursor + 1) == Some(&b']') => return Some(cursor),
4531            _ => cursor = next_char(input, cursor)?.0,
4532        }
4533    }
4534    None
4535}
4536
4537fn find_wikilink_separator(input: &str) -> Option<usize> {
4538    let bytes = input.as_bytes();
4539    let mut cursor = 0;
4540    while cursor < input.len() {
4541        match bytes[cursor] {
4542            b'\\' => {
4543                cursor += 1;
4544                if cursor < input.len() {
4545                    cursor = next_char(input, cursor)?.0;
4546                }
4547            }
4548            b'|' => return Some(cursor),
4549            _ => cursor = next_char(input, cursor)?.0,
4550        }
4551    }
4552    None
4553}
4554
4555fn trailing_space_count(input: &str) -> usize {
4556    input
4557        .as_bytes()
4558        .iter()
4559        .rev()
4560        .take_while(|byte| matches!(**byte, b' ' | b'\t'))
4561        .count()
4562}
4563
4564fn is_hard_break_suffix(input: &str, trailing: usize) -> bool {
4565    // A hard line break is two or more spaces immediately before the newline
4566    // with no intervening tab; a tab anywhere in the trailing whitespace run
4567    // demotes it to a soft break.
4568    let bytes = input.as_bytes();
4569    trailing >= 2
4570        && bytes[bytes.len() - trailing..]
4571            .iter()
4572            .all(|byte| *byte == b' ')
4573}
4574
4575fn parse_image(
4576    input: &str,
4577    index: usize,
4578    base_offset: usize,
4579    options: &SyntaxOptions,
4580    definitions: &[String],
4581    diagnostics: &mut Vec<Diagnostic>,
4582) -> Option<(usize, Inline)> {
4583    let label_start = index + 2;
4584    let label_end = find_link_label_end(input, index + 1)?;
4585    let alt_source = &input[label_start..label_end];
4586    let after_label = label_end + 1;
4587    if input.as_bytes().get(after_label) == Some(&b'(') {
4588        let (close, resource) = parse_link_resource(input, after_label)?;
4589        return Some((
4590            close,
4591            Inline::Image(Image {
4592                meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + close))),
4593                destination: resource.destination,
4594                destination_kind: resource.destination_kind,
4595                title: resource.title,
4596                title_kind: resource.title_kind,
4597                alt: parse_inlines(
4598                    alt_source,
4599                    base_offset + label_start,
4600                    options,
4601                    definitions,
4602                    diagnostics,
4603                ),
4604            }),
4605        ));
4606    }
4607    if input.as_bytes().get(after_label) == Some(&b'[') {
4608        let close = find_reference_label_end(input, after_label)?;
4609        let label = &input[after_label + 1..close];
4610        let identifier = if label.is_empty() { alt_source } else { label };
4611        if definition_exists(definitions, identifier) {
4612            return Some((
4613                close + 1,
4614                Inline::ImageReference(ImageReference {
4615                    meta: NodeMeta::new(Some(Span::new(
4616                        base_offset + index,
4617                        base_offset + close + 1,
4618                    ))),
4619                    identifier: normalize_label(identifier),
4620                    label: identifier.into(),
4621                    kind: if label.is_empty() {
4622                        ReferenceKind::Collapsed
4623                    } else {
4624                        ReferenceKind::Full
4625                    },
4626                    alt: parse_inlines(
4627                        alt_source,
4628                        base_offset + label_start,
4629                        options,
4630                        definitions,
4631                        diagnostics,
4632                    ),
4633                }),
4634            ));
4635        }
4636        // A present `[...]` second label that resolves to no definition is not a
4637        // reference and does not fall back to a shortcut (mirrors parse_link).
4638        return None;
4639    }
4640    // Shortcut image reference `![foo]` (no following `(`/`[`) where `foo` is a
4641    // defined label — mirrors parse_link's shortcut branch.
4642    if definition_exists(definitions, alt_source) {
4643        return Some((
4644            after_label,
4645            Inline::ImageReference(ImageReference {
4646                meta: NodeMeta::new(Some(Span::new(
4647                    base_offset + index,
4648                    base_offset + after_label,
4649                ))),
4650                identifier: normalize_label(alt_source),
4651                label: alt_source.into(),
4652                kind: ReferenceKind::Shortcut,
4653                alt: parse_inlines(
4654                    alt_source,
4655                    base_offset + label_start,
4656                    options,
4657                    definitions,
4658                    diagnostics,
4659                ),
4660            }),
4661        ));
4662    }
4663    None
4664}
4665
4666fn parse_link(
4667    input: &str,
4668    index: usize,
4669    base_offset: usize,
4670    options: &SyntaxOptions,
4671    definitions: &[String],
4672    diagnostics: &mut Vec<Diagnostic>,
4673    context: InlineContext,
4674) -> Option<(usize, Inline)> {
4675    if !context.allow_links {
4676        return None;
4677    }
4678    let label_end = find_link_label_end(input, index)?;
4679    let label_source = &input[index + 1..label_end];
4680    if label_contains_link(label_source, base_offset + index + 1, options, definitions) {
4681        return None;
4682    }
4683    let after_label = label_end + 1;
4684    if input.as_bytes().get(after_label) == Some(&b'(') {
4685        // A present-but-invalid `(...)` resource is not an inline link, but
4686        // CommonMark still resolves `[label]` as a shortcut reference and leaves
4687        // the invalid `(...)` as literal text (links 568) — so fall through to
4688        // the reference branches below instead of bailing out of parse_link.
4689        if let Some((close, resource)) = parse_link_resource(input, after_label) {
4690            return Some((
4691                close,
4692                Inline::Link(Link {
4693                    meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + close))),
4694                    destination: resource.destination,
4695                    destination_kind: resource.destination_kind,
4696                    title: resource.title,
4697                    title_kind: resource.title_kind,
4698                    children: parse_inlines_with_context(
4699                        label_source,
4700                        base_offset + index + 1,
4701                        options,
4702                        definitions,
4703                        diagnostics,
4704                        InlineContext { allow_links: false },
4705                    ),
4706                }),
4707            ));
4708        }
4709    }
4710    if input.as_bytes().get(after_label) == Some(&b'[') {
4711        let close = find_reference_label_end(input, after_label)?;
4712        let label = &input[after_label + 1..close];
4713        let identifier = if label.is_empty() {
4714            label_source
4715        } else {
4716            label
4717        };
4718        if definition_exists(definitions, identifier) {
4719            return Some((
4720                close + 1,
4721                Inline::LinkReference(LinkReference {
4722                    meta: NodeMeta::new(Some(Span::new(
4723                        base_offset + index,
4724                        base_offset + close + 1,
4725                    ))),
4726                    identifier: normalize_label(identifier),
4727                    label: identifier.into(),
4728                    kind: if label.is_empty() {
4729                        ReferenceKind::Collapsed
4730                    } else {
4731                        ReferenceKind::Full
4732                    },
4733                    children: parse_inlines_with_context(
4734                        label_source,
4735                        base_offset + index + 1,
4736                        options,
4737                        definitions,
4738                        diagnostics,
4739                        InlineContext { allow_links: false },
4740                    ),
4741                }),
4742            ));
4743        }
4744        // A present `[...]` second label that resolves to no definition is NOT a
4745        // link, and CommonMark does not fall back to treating the first label as
4746        // a shortcut (`[x][ ]`, `[x][undef]` stay literal). Only a truly absent
4747        // `[...]` reaches the shortcut path below.
4748        return None;
4749    }
4750    if definition_exists(definitions, label_source) {
4751        return Some((
4752            after_label,
4753            Inline::LinkReference(LinkReference {
4754                meta: NodeMeta::new(Some(Span::new(
4755                    base_offset + index,
4756                    base_offset + after_label,
4757                ))),
4758                identifier: normalize_label(label_source),
4759                label: label_source.into(),
4760                kind: ReferenceKind::Shortcut,
4761                children: parse_inlines_with_context(
4762                    label_source,
4763                    base_offset + index + 1,
4764                    options,
4765                    definitions,
4766                    diagnostics,
4767                    InlineContext { allow_links: false },
4768                ),
4769            }),
4770        ));
4771    }
4772    None
4773}
4774
4775fn find_reference_label_end(input: &str, open: usize) -> Option<usize> {
4776    // A reference/definition link label does not nest: it ends at the first
4777    // unescaped `]`, and an unescaped interior `[` disqualifies it.
4778    if input.as_bytes().get(open) != Some(&b'[') {
4779        return None;
4780    }
4781
4782    let mut cursor = open + 1;
4783    while cursor < input.len() {
4784        let (next, char) = next_char(input, cursor)?;
4785        match char {
4786            '\\' => {
4787                cursor = next_char(input, next)
4788                    .map(|(after_escape, _)| after_escape)
4789                    .unwrap_or(next);
4790                continue;
4791            }
4792            '[' => return None,
4793            ']' => {
4794                return reference_label_is_within_limit(&input[open + 1..cursor]).then_some(cursor);
4795            }
4796            _ => {}
4797        }
4798        cursor = next;
4799    }
4800    None
4801}
4802
4803fn label_contains_link(
4804    label_source: &str,
4805    base_offset: usize,
4806    options: &SyntaxOptions,
4807    definitions: &[String],
4808) -> bool {
4809    let mut diagnostics = Vec::new();
4810    let inlines = parse_inlines_with_context(
4811        label_source,
4812        base_offset,
4813        options,
4814        definitions,
4815        &mut diagnostics,
4816        InlineContext::default(),
4817    );
4818    contains_link_inline(&inlines)
4819}
4820
4821fn contains_link_inline(inlines: &[Inline]) -> bool {
4822    inlines.iter().any(|inline| match inline {
4823        Inline::Link(_) | Inline::LinkReference(_) => true,
4824        Inline::Emphasis(node) => contains_link_inline(&node.children),
4825        Inline::Strong(node) => contains_link_inline(&node.children),
4826        Inline::Delete(node) => contains_link_inline(&node.children),
4827        Inline::TextDirective(node) => contains_link_inline(&node.label),
4828        _ => false,
4829    })
4830}
4831
4832fn find_link_label_end(input: &str, open: usize) -> Option<usize> {
4833    if input.as_bytes().get(open) != Some(&b'[') {
4834        return None;
4835    }
4836
4837    let mut depth = 1usize;
4838    let mut cursor = open + 1;
4839    while cursor < input.len() {
4840        let (next, char) = next_char(input, cursor)?;
4841        match char {
4842            '\\' => {
4843                cursor = next_char(input, next)
4844                    .map(|(after_escape, _)| after_escape)
4845                    .unwrap_or(next);
4846                continue;
4847            }
4848            '`' => {
4849                if let Some((end, _)) = parse_code_span(input, cursor) {
4850                    cursor = end;
4851                    continue;
4852                }
4853            }
4854            '<' => {
4855                if let Some(end) = parse_autolink_end(input, cursor) {
4856                    let raw = &input[cursor..end];
4857                    if is_autolink(raw) {
4858                        cursor = end;
4859                        continue;
4860                    }
4861                }
4862                if let Some((end, _)) = parse_html_inline(input, cursor) {
4863                    cursor = end;
4864                    continue;
4865                }
4866            }
4867            '[' => depth += 1,
4868            ']' => {
4869                depth = depth.checked_sub(1)?;
4870                if depth == 0 {
4871                    return Some(cursor);
4872                }
4873            }
4874            _ => {}
4875        }
4876        cursor = next;
4877    }
4878    None
4879}
4880
4881fn parse_text_directive(
4882    input: &str,
4883    index: usize,
4884    base_offset: usize,
4885    options: &SyntaxOptions,
4886    definitions: &[String],
4887    diagnostics: &mut Vec<Diagnostic>,
4888) -> Option<(usize, Inline)> {
4889    if input[index..].starts_with("::") {
4890        return None;
4891    }
4892    if index > 0 {
4893        let previous = input[..index].chars().next_back()?;
4894        if !previous.is_whitespace() && !matches!(previous, '(' | '[' | '{') {
4895            return None;
4896        }
4897    }
4898    let opener_source = &input[index + 1..];
4899    let (name, label_source, attributes, consumed) = match parse_directive_opener(opener_source) {
4900        Some(opener) => opener,
4901        None => {
4902            if directive_opener_looks_malformed(opener_source) {
4903                diagnostics.push(Diagnostic::new(
4904                    DiagnosticSeverity::Error,
4905                    DiagnosticCode::InvalidDirectiveName,
4906                    Span::new(base_offset + index, base_offset + input.len()),
4907                    "text directive opener is malformed",
4908                ));
4909            }
4910            return None;
4911        }
4912    };
4913    let label = label_source
4914        .map(|source| {
4915            parse_inlines(
4916                source,
4917                base_offset + index + 1 + name.len() + 1,
4918                options,
4919                definitions,
4920                diagnostics,
4921            )
4922        })
4923        .unwrap_or_default();
4924    Some((
4925        index + 1 + consumed,
4926        Inline::TextDirective(TextDirective {
4927            meta: NodeMeta::new(Some(Span::new(
4928                base_offset + index,
4929                base_offset + index + 1 + consumed,
4930            ))),
4931            name,
4932            label,
4933            attributes,
4934        }),
4935    ))
4936}
4937
4938fn parse_directive_opener(
4939    input: &str,
4940) -> Option<(String, Option<&str>, Vec<DirectiveAttribute>, usize)> {
4941    let mut index = 0;
4942    while let Some((next, char)) = next_char(input, index) {
4943        if char.is_ascii_alphanumeric() || char == '_' || char == '-' {
4944            index = next;
4945        } else {
4946            break;
4947        }
4948    }
4949    let name = &input[..index];
4950    if !is_directive_name(name) {
4951        return None;
4952    }
4953
4954    let mut label = None;
4955    let mut attributes = Vec::new();
4956    let mut consumed = index;
4957    if input.as_bytes().get(consumed) == Some(&b'[') {
4958        let close = find_link_label_end(input, consumed)?;
4959        label = Some(&input[consumed + 1..close]);
4960        consumed = close + 1;
4961    }
4962    if input.as_bytes().get(consumed) == Some(&b'{') {
4963        let close = find_directive_attributes_close(input, consumed)?;
4964        attributes = parse_attributes(&input[consumed + 1..close]);
4965        consumed = close + 1;
4966    }
4967
4968    Some((name.into(), label, attributes, consumed))
4969}
4970
4971fn directive_opener_looks_malformed(input: &str) -> bool {
4972    let mut index = 0;
4973    while let Some((next, char)) = next_char(input, index) {
4974        if char.is_ascii_alphanumeric() || char == '_' || char == '-' {
4975            index = next;
4976        } else {
4977            break;
4978        }
4979    }
4980    index > 0
4981        && is_directive_name(&input[..index])
4982        && matches!(input.as_bytes().get(index), Some(b'[' | b'{'))
4983}
4984
4985fn find_directive_attributes_close(input: &str, open: usize) -> Option<usize> {
4986    if input.as_bytes().get(open) != Some(&b'{') {
4987        return None;
4988    }
4989
4990    let bytes = input.as_bytes();
4991    let mut cursor = open + 1;
4992    let mut quote = None;
4993    let mut escaped = false;
4994    while cursor < input.len() {
4995        let byte = bytes[cursor];
4996        if escaped {
4997            escaped = false;
4998            cursor += 1;
4999            continue;
5000        }
5001        if byte == b'\\' {
5002            escaped = true;
5003            cursor += 1;
5004            continue;
5005        }
5006        if let Some(delimiter) = quote {
5007            if byte == delimiter {
5008                quote = None;
5009            }
5010            cursor += 1;
5011            continue;
5012        }
5013        match byte {
5014            b'"' | b'\'' => quote = Some(byte),
5015            b'}' => return Some(cursor),
5016            _ => {}
5017        }
5018        cursor += 1;
5019    }
5020    None
5021}
5022
5023fn parse_attributes(input: &str) -> Vec<DirectiveAttribute> {
5024    let mut attributes = Vec::new();
5025    let mut cursor = 0;
5026    while cursor < input.len() {
5027        cursor = skip_spaces(input, cursor);
5028        if cursor >= input.len() {
5029            break;
5030        }
5031
5032        if input.as_bytes().get(cursor) == Some(&b'#') {
5033            let (id, next) = parse_attribute_token(input, cursor + 1);
5034            if !id.is_empty() {
5035                attributes.push(DirectiveAttribute {
5036                    name: "id".into(),
5037                    value: Some(id.into()),
5038                });
5039            }
5040            cursor = next;
5041            continue;
5042        }
5043
5044        if input.as_bytes().get(cursor) == Some(&b'.') {
5045            let (class, next) = parse_attribute_token(input, cursor + 1);
5046            if !class.is_empty() {
5047                attributes.push(DirectiveAttribute {
5048                    name: "class".into(),
5049                    value: Some(class.into()),
5050                });
5051            }
5052            cursor = next;
5053            continue;
5054        }
5055
5056        let (name, next) = parse_attribute_name(input, cursor);
5057        if name.is_empty() {
5058            break;
5059        }
5060        cursor = skip_spaces(input, next);
5061        if input.as_bytes().get(cursor) == Some(&b'=') {
5062            cursor = skip_spaces(input, cursor + 1);
5063            if let Some((value, next)) = parse_attribute_value(input, cursor) {
5064                attributes.push(DirectiveAttribute {
5065                    name: name.into(),
5066                    value: Some(value),
5067                });
5068                cursor = next;
5069            } else {
5070                attributes.push(DirectiveAttribute {
5071                    name: name.into(),
5072                    value: Some(String::new()),
5073                });
5074            }
5075        } else {
5076            attributes.push(DirectiveAttribute {
5077                name: name.into(),
5078                value: None,
5079            });
5080        }
5081    }
5082    attributes
5083}
5084
5085fn parse_attribute_token(input: &str, index: usize) -> (&str, usize) {
5086    let mut cursor = index;
5087    while let Some((next, char)) = next_char(input, cursor) {
5088        if char.is_whitespace() {
5089            break;
5090        }
5091        cursor = next;
5092    }
5093    (&input[index..cursor], cursor)
5094}
5095
5096fn parse_attribute_name(input: &str, index: usize) -> (&str, usize) {
5097    let mut cursor = index;
5098    while let Some((next, char)) = next_char(input, cursor) {
5099        if char.is_whitespace() || char == '=' {
5100            break;
5101        }
5102        cursor = next;
5103    }
5104    (&input[index..cursor], cursor)
5105}
5106
5107fn parse_attribute_value(input: &str, index: usize) -> Option<(String, usize)> {
5108    let quote = input.as_bytes().get(index).copied();
5109    if matches!(quote, Some(b'"' | b'\'')) {
5110        let quote = quote?;
5111        let mut cursor = index + 1;
5112        while cursor < input.len() {
5113            let (next, char) = next_char(input, cursor)?;
5114            if char as u8 == quote && !is_escaped_at(input, cursor) {
5115                return Some((unescape_ascii_punctuation(&input[index + 1..cursor]), next));
5116            }
5117            cursor = next;
5118        }
5119        return None;
5120    }
5121
5122    let (value, next) = parse_attribute_token(input, index);
5123    Some((
5124        unescape_selected(value, |char| matches!(char, '\\' | '&')),
5125        next,
5126    ))
5127}
5128
5129struct CodeSpanSource {
5130    value: String,
5131    raw: String,
5132    fence_length: usize,
5133}
5134
5135fn parse_code_span(input: &str, index: usize) -> Option<(usize, CodeSpanSource)> {
5136    let len = input[index..]
5137        .as_bytes()
5138        .iter()
5139        .take_while(|byte| **byte == b'`')
5140        .count();
5141    let search_start = index + len;
5142    let close = find_code_span_close(input, search_start, len)?;
5143    let raw = &input[search_start..close];
5144    Some((
5145        close + len,
5146        CodeSpanSource {
5147            value: normalize_code_span(raw),
5148            raw: raw.into(),
5149            fence_length: len,
5150        },
5151    ))
5152}
5153
5154fn find_code_span_close(input: &str, start: usize, marker_len: usize) -> Option<usize> {
5155    let bytes = input.as_bytes();
5156    let mut cursor = start;
5157    while cursor < bytes.len() {
5158        if bytes[cursor] != b'`' {
5159            cursor = next_char(input, cursor)
5160                .map(|(next, _)| next)
5161                .unwrap_or(bytes.len());
5162            continue;
5163        }
5164        let run_len = bytes[cursor..]
5165            .iter()
5166            .take_while(|byte| **byte == b'`')
5167            .count();
5168        if run_len == marker_len {
5169            return Some(cursor);
5170        }
5171        cursor += run_len;
5172    }
5173    None
5174}
5175
5176fn normalize_code_span(input: &str) -> String {
5177    let mut normalized = String::new();
5178    let mut cursor = 0;
5179    while cursor < input.len() {
5180        let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5181        if char == '\r' {
5182            if input.as_bytes().get(next) == Some(&b'\n') {
5183                cursor = next + 1;
5184            } else {
5185                cursor = next;
5186            }
5187            normalized.push(' ');
5188            continue;
5189        }
5190        if char == '\n' {
5191            normalized.push(' ');
5192            cursor = next;
5193            continue;
5194        }
5195        normalized.push(char);
5196        cursor = next;
5197    }
5198
5199    if normalized.starts_with(' ')
5200        && normalized.ends_with(' ')
5201        && normalized.chars().any(|char| char != ' ')
5202    {
5203        normalized[1..normalized.len() - 1].into()
5204    } else {
5205        normalized
5206    }
5207}
5208
5209fn can_open_delimited(input: &str, index: usize, marker_len: usize) -> bool {
5210    delimiter_flanking(input, index, marker_len).left
5211}
5212
5213fn can_close_delimited(input: &str, index: usize, marker_len: usize) -> bool {
5214    delimiter_flanking(input, index, marker_len).right
5215}
5216
5217fn find_closing_delimiter(
5218    input: &str,
5219    start: usize,
5220    marker: &str,
5221    underscore: bool,
5222) -> Option<usize> {
5223    let marker_len = marker.len();
5224    let mut cursor = start;
5225    let mut nested = 0usize;
5226    while cursor <= input.len() {
5227        let candidate = input[cursor..].find(marker).map(|offset| cursor + offset)?;
5228        if is_escaped_at(input, candidate) {
5229            cursor = candidate + marker_len;
5230            continue;
5231        }
5232        if delimiter_candidate_precedes_link_close(input, start, candidate, marker_len) {
5233            cursor = candidate + marker_len;
5234            continue;
5235        }
5236        if marker_len == 1 && nested == 0 && starts_longer_delimiter_run(input, candidate, marker) {
5237            cursor = candidate + delimiter_run_len(input, candidate, marker);
5238            continue;
5239        }
5240
5241        let can_open = if underscore {
5242            can_open_underscore(input, candidate, marker_len)
5243        } else {
5244            can_open_delimited(input, candidate, marker_len)
5245        };
5246        let can_close = if underscore {
5247            can_close_underscore(input, candidate, marker_len)
5248        } else {
5249            can_close_delimited(input, candidate, marker_len)
5250        };
5251
5252        if can_close {
5253            if nested == 0 {
5254                return Some(candidate);
5255            }
5256            nested -= 1;
5257            cursor = candidate + marker_len;
5258            continue;
5259        }
5260        if can_open {
5261            nested += 1;
5262        }
5263        cursor = candidate + marker_len;
5264    }
5265    None
5266}
5267
5268fn find_single_tilde_delete_close(input: &str, start: usize) -> Option<usize> {
5269    let mut cursor = start;
5270    while cursor < input.len() {
5271        let Some(candidate) = input[cursor..].find('~').map(|index| cursor + index) else {
5272            break;
5273        };
5274        if !is_escaped_at(input, candidate) && single_tilde_can_close_delete(input, candidate) {
5275            return Some(candidate);
5276        }
5277        cursor = candidate + 1;
5278    }
5279    None
5280}
5281
5282fn single_tilde_can_open_delete(input: &str, index: usize) -> bool {
5283    starts_exact_byte_run(input, index, b'~', 1)
5284        && can_open_delimited(input, index, 1)
5285        && !tilde_is_alphanumeric_interior(input, index)
5286}
5287
5288fn single_tilde_can_close_delete(input: &str, index: usize) -> bool {
5289    starts_exact_byte_run(input, index, b'~', 1)
5290        && can_close_delimited(input, index, 1)
5291        && !tilde_is_alphanumeric_interior(input, index)
5292}
5293
5294fn single_tilde_delete_takes_precedence(
5295    options: &SyntaxOptions,
5296    input: &str,
5297    index: usize,
5298) -> bool {
5299    options.constructs.gfm_strikethrough
5300        && options.parse.single_tilde_strikethrough
5301        && single_tilde_can_open_delete(input, index)
5302        && find_single_tilde_delete_close(input, index + 1).is_some()
5303}
5304
5305fn tilde_is_alphanumeric_interior(input: &str, index: usize) -> bool {
5306    let previous = input[..index].chars().next_back();
5307    let next = input[index + 1..].chars().next();
5308    previous.is_some_and(|char| char.is_alphanumeric())
5309        && next.is_some_and(|char| char.is_alphanumeric())
5310}
5311
5312fn starts_exact_byte_run(input: &str, index: usize, marker: u8, len: usize) -> bool {
5313    input.as_bytes().get(index) == Some(&marker)
5314        && delimiter_byte_run_start(input, index, marker) == index
5315        && delimiter_byte_run_len(input, index, marker) == len
5316}
5317
5318fn delimiter_byte_run_start(input: &str, index: usize, marker: u8) -> usize {
5319    let bytes = input.as_bytes();
5320    let mut start = index;
5321    while start > 0 && bytes[start - 1] == marker && !is_escaped_at(input, start - 1) {
5322        start -= 1;
5323    }
5324    start
5325}
5326
5327fn delimiter_byte_run_len(input: &str, index: usize, marker: u8) -> usize {
5328    let bytes = input.as_bytes();
5329    let mut cursor = index;
5330    while bytes.get(cursor) == Some(&marker) {
5331        cursor += 1;
5332    }
5333    cursor - index
5334}
5335
5336fn find_simple_inline_close(input: &str, start: usize, marker: u8) -> Option<usize> {
5337    let bytes = input.as_bytes();
5338    let mut cursor = start;
5339    while cursor < input.len() {
5340        match bytes[cursor] {
5341            b'\\' => {
5342                cursor += 1;
5343                if cursor < input.len() {
5344                    cursor = next_char(input, cursor)?.0;
5345                }
5346            }
5347            b'\n' | b'\r' => return None,
5348            byte if byte == marker => return (cursor > start).then_some(cursor),
5349            _ => cursor = next_char(input, cursor)?.0,
5350        }
5351    }
5352    None
5353}
5354
5355fn find_spoiler_close(input: &str, start: usize) -> Option<usize> {
5356    let bytes = input.as_bytes();
5357    let mut cursor = start;
5358    while cursor + 1 < input.len() {
5359        match bytes[cursor] {
5360            b'\\' => {
5361                cursor += 1;
5362                if cursor < input.len() {
5363                    cursor = next_char(input, cursor)?.0;
5364                }
5365            }
5366            b'\n' | b'\r' => return None,
5367            b'|' if bytes.get(cursor + 1) == Some(&b'|')
5368                && cursor > start
5369                && bytes.get(cursor.wrapping_sub(1)) != Some(&b'|') =>
5370            {
5371                return Some(cursor);
5372            }
5373            _ => cursor = next_char(input, cursor)?.0,
5374        }
5375    }
5376    None
5377}
5378
5379fn starts_longer_delimiter_run(input: &str, index: usize, marker: &str) -> bool {
5380    input[index..].starts_with(marker)
5381        && !input[..index].ends_with(marker)
5382        && input[index + marker.len()..].starts_with(marker)
5383}
5384
5385fn delimiter_run_len(input: &str, index: usize, marker: &str) -> usize {
5386    let mut cursor = index;
5387    while input[cursor..].starts_with(marker) {
5388        cursor += marker.len();
5389    }
5390    cursor - index
5391}
5392
5393fn delimiter_candidate_precedes_link_close(
5394    input: &str,
5395    start: usize,
5396    candidate: usize,
5397    marker_len: usize,
5398) -> bool {
5399    let bytes = input.as_bytes();
5400    if bytes.get(candidate + marker_len) != Some(&b']') {
5401        return false;
5402    }
5403    if !matches!(bytes.get(candidate + marker_len + 1), Some(b'(' | b'[')) {
5404        return false;
5405    }
5406
5407    let mut depth = 0usize;
5408    let mut cursor = start;
5409    while cursor < candidate {
5410        let Some((next, char)) = next_char(input, cursor) else {
5411            break;
5412        };
5413        match char {
5414            '\\' => {
5415                cursor = next_char(input, next)
5416                    .map(|(after_escape, _)| after_escape)
5417                    .unwrap_or(next);
5418                continue;
5419            }
5420            '`' => {
5421                if let Some((end, _)) = parse_code_span(input, cursor) {
5422                    cursor = end;
5423                    continue;
5424                }
5425            }
5426            '[' => depth += 1,
5427            ']' => depth = depth.saturating_sub(1),
5428            _ => {}
5429        }
5430        cursor = next;
5431    }
5432    depth > 0
5433}
5434
5435fn can_open_underscore(input: &str, index: usize, marker_len: usize) -> bool {
5436    let flanking = delimiter_flanking(input, index, marker_len);
5437    flanking.left
5438        && (!flanking.right || flanking.previous.is_some_and(|c| c.is_ascii_punctuation()))
5439}
5440
5441fn can_close_underscore(input: &str, index: usize, marker_len: usize) -> bool {
5442    let flanking = delimiter_flanking(input, index, marker_len);
5443    flanking.right && (!flanking.left || flanking.next.is_some_and(|c| c.is_ascii_punctuation()))
5444}
5445
5446#[derive(Clone, Copy)]
5447struct DelimiterFlanking {
5448    left: bool,
5449    right: bool,
5450    previous: Option<char>,
5451    next: Option<char>,
5452}
5453
5454fn delimiter_flanking(input: &str, index: usize, marker_len: usize) -> DelimiterFlanking {
5455    let previous = input[..index].chars().next_back();
5456    let next = input[index + marker_len..].chars().next();
5457
5458    let previous_whitespace = previous.is_none_or(char::is_whitespace);
5459    let next_whitespace = next.is_none_or(char::is_whitespace);
5460    let previous_punctuation = previous.is_some_and(is_flanking_punctuation);
5461    let next_punctuation = next.is_some_and(is_flanking_punctuation);
5462
5463    let left = next.is_some()
5464        && !next_whitespace
5465        && !(next_punctuation && !previous_whitespace && !previous_punctuation);
5466    let right = previous.is_some()
5467        && !previous_whitespace
5468        && !(previous_punctuation && !next_whitespace && !next_punctuation);
5469
5470    DelimiterFlanking {
5471        left,
5472        right,
5473        previous,
5474        next,
5475    }
5476}
5477
5478/// Dollar-fenced inline math, GitHub Flavored Markdown dialect.
5479///
5480/// A `$` is a flanking delimiter resolved at scan time (math is not pushed onto
5481/// the emphasis delimiter stack). An opening run of one or two `$` (runs of
5482/// three or more never form math) scans forward for a matching closing run:
5483///
5484/// * single `$`: cannot open if the next char is ASCII whitespace; the closing
5485///   `$` cannot be preceded by ASCII whitespace nor followed by an ASCII digit;
5486///   a `\$` inside is skipped (the backslash is kept verbatim, never a
5487///   delimiter); the close must be a run of exactly one `$`.
5488/// * double `$$`: no flanking and no digit guard; closes on the next run of two
5489///   `$`; content is kept verbatim and may span newlines (this is still an
5490///   inline display span — `$$` flow blocks are handled by `parse_math_block`).
5491///
5492/// The closing run is matched greedily (the nearest valid close wins), which is
5493/// equivalent to emphasis-style "nearest preceding open" because a failed open
5494/// emits a literal `$`/`$$` and the scan resumes after it. Content for the
5495/// single-`$` form is normalized like a code span (line endings → spaces, one
5496/// edge-space strip); the `$$` display form is verbatim. The `` $`…`$ `` code
5497/// form takes precedence.
5498fn parse_math_inline(input: &str, index: usize) -> Option<(usize, String, MathInlineKind)> {
5499    if let Some((end, value)) = parse_math_code_inline(input, index) {
5500        return Some((end, value, MathInlineKind::Code));
5501    }
5502
5503    let bytes = input.as_bytes();
5504    let open_dollars = bytes[index..]
5505        .iter()
5506        .take_while(|byte| **byte == b'$')
5507        .count();
5508    // The maximum math fence length is 2 dollars: a run of three or more never
5509    // opens math.
5510    if open_dollars == 0 || open_dollars > 2 {
5511        return None;
5512    }
5513
5514    let content_start = index + open_dollars;
5515    let close = scan_to_closing_dollar(input, content_start, open_dollars)?;
5516    let content_end = close - open_dollars;
5517    // The span requires `endpos - startpos >= fence_length * 2 + 1`, i.e. at
5518    // least one content byte between the open and close fences.
5519    if content_end <= content_start {
5520        return None;
5521    }
5522
5523    let raw = &input[content_start..content_end];
5524    let value = if open_dollars == 1 {
5525        normalize_math_text(raw)
5526    } else {
5527        raw.into()
5528    };
5529    let dollars = u8::try_from(open_dollars).unwrap_or(u8::MAX);
5530    Some((close, value, MathInlineKind::Dollar { dollars }))
5531}
5532
5533/// Scans for the closing dollar run. `start` is the first content byte
5534/// (just past the opening run); returns the byte offset just past a matching
5535/// closing run of exactly `open_dollars` `$`.
5536fn scan_to_closing_dollar(input: &str, start: usize, open_dollars: usize) -> Option<usize> {
5537    let bytes = input.as_bytes();
5538    // A space immediately after a single opening `$` forbids the open.
5539    if open_dollars == 1 && bytes.get(start).is_some_and(|byte| is_math_space(*byte)) {
5540        return None;
5541    }
5542
5543    let mut cursor = start;
5544    loop {
5545        while cursor < bytes.len() && bytes[cursor] != b'$' {
5546            cursor += 1;
5547        }
5548        if cursor >= bytes.len() {
5549            return None;
5550        }
5551        // `cursor` now points at the first `$` of a potential closing run; the
5552        // char just before it gates the single-`$` flanking and escape rules.
5553        let prev = bytes[cursor - 1];
5554        if open_dollars == 1 && is_math_space(prev) {
5555            return None;
5556        }
5557        if open_dollars == 1 && prev == b'\\' {
5558            // An escaped `\$` is content, not a delimiter: skip this one `$` and
5559            // keep scanning (the backslash stays in the content verbatim).
5560            cursor += 1;
5561            continue;
5562        }
5563        let run = bytes[cursor..]
5564            .iter()
5565            .take(open_dollars)
5566            .take_while(|byte| **byte == b'$')
5567            .count();
5568        // The single-`$` close cannot be followed by an ASCII digit.
5569        if open_dollars == 1 && bytes.get(cursor + run).is_some_and(u8::is_ascii_digit) {
5570            return None;
5571        }
5572        if run == open_dollars {
5573            return Some(cursor + run);
5574        }
5575        cursor += run;
5576    }
5577}
5578
5579/// Math whitespace: ASCII tab, line feed, carriage return, and space.
5580fn is_math_space(byte: u8) -> bool {
5581    matches!(byte, b'\t' | b'\n' | b'\r' | b' ')
5582}
5583
5584/// Applies the code-span content rules to dollar-fenced math: line endings
5585/// become single spaces, then if the content begins AND ends with U+0020 and is
5586/// not entirely spaces, one space is stripped from each edge.
5587fn normalize_math_text(input: &str) -> String {
5588    let mut normalized = String::new();
5589    let mut cursor = 0;
5590    while cursor < input.len() {
5591        let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5592        if char == '\r' {
5593            if input.as_bytes().get(next) == Some(&b'\n') {
5594                cursor = next + 1;
5595            } else {
5596                cursor = next;
5597            }
5598            normalized.push(' ');
5599            continue;
5600        }
5601        if char == '\n' {
5602            normalized.push(' ');
5603            cursor = next;
5604            continue;
5605        }
5606        normalized.push(char);
5607        cursor = next;
5608    }
5609
5610    if normalized.starts_with(' ')
5611        && normalized.ends_with(' ')
5612        && normalized.chars().any(|char| char != ' ')
5613    {
5614        normalized[1..normalized.len() - 1].into()
5615    } else {
5616        normalized
5617    }
5618}
5619
5620fn parse_math_code_inline(input: &str, index: usize) -> Option<(usize, String)> {
5621    if !input[index..].starts_with("$`") {
5622        return None;
5623    }
5624
5625    let search_start = index + 2;
5626    let close = input[search_start..]
5627        .find("`$")
5628        .map(|offset| search_start + offset)?;
5629    if close == search_start {
5630        return None;
5631    }
5632
5633    Some((close + 2, input[search_start..close].into()))
5634}
5635
5636fn parse_link_resource(input: &str, open: usize) -> Option<(usize, ParsedLinkResource)> {
5637    let bytes = input.as_bytes();
5638    if bytes.get(open) != Some(&b'(') {
5639        return None;
5640    }
5641    let (mut cursor, initial_space) = skip_link_resource_space_with_info(input, open + 1)?;
5642    if bytes.get(cursor) == Some(&b')') {
5643        return Some((
5644            cursor + 1,
5645            ParsedLinkResource {
5646                destination: String::new(),
5647                destination_kind: LinkDestinationKind::Omitted,
5648                title: None,
5649                title_kind: None,
5650            },
5651        ));
5652    }
5653    if initial_space && matches!(bytes.get(cursor), Some(b'"' | b'\'' | b'(')) {
5654        let (title, title_kind, next) = parse_link_title(input, cursor)?;
5655        cursor = skip_link_resource_space(input, next)?;
5656        if bytes.get(cursor) == Some(&b')') {
5657            return Some((
5658                cursor + 1,
5659                ParsedLinkResource {
5660                    destination: String::new(),
5661                    destination_kind: LinkDestinationKind::Omitted,
5662                    title: Some(title),
5663                    title_kind: Some(title_kind),
5664                },
5665            ));
5666        }
5667        return None;
5668    }
5669    let (destination, destination_kind, next) = parse_link_destination(input, cursor)?;
5670    let (after_destination, had_space) = skip_link_resource_space_with_info(input, next)?;
5671    cursor = after_destination;
5672    if bytes.get(cursor) == Some(&b')') {
5673        return Some((
5674            cursor + 1,
5675            ParsedLinkResource {
5676                destination,
5677                destination_kind,
5678                title: None,
5679                title_kind: None,
5680            },
5681        ));
5682    }
5683    if !had_space {
5684        return None;
5685    }
5686
5687    let (title, title_kind, next) = parse_link_title(input, cursor)?;
5688    cursor = skip_link_resource_space(input, next)?;
5689    if bytes.get(cursor) == Some(&b')') {
5690        Some((
5691            cursor + 1,
5692            ParsedLinkResource {
5693                destination,
5694                destination_kind,
5695                title: Some(title),
5696                title_kind: Some(title_kind),
5697            },
5698        ))
5699    } else {
5700        None
5701    }
5702}
5703
5704fn parse_link_destination(
5705    input: &str,
5706    index: usize,
5707) -> Option<(String, LinkDestinationKind, usize)> {
5708    if input.as_bytes().get(index) == Some(&b'<') {
5709        let mut cursor = index + 1;
5710        while cursor < input.len() {
5711            let (next, char) = next_char(input, cursor)?;
5712            if char == '>' && !is_escaped_at(input, cursor) {
5713                return Some((
5714                    unescape_ascii_punctuation(&input[index + 1..cursor]),
5715                    LinkDestinationKind::Angle,
5716                    next,
5717                ));
5718            }
5719            if (char == '<' && !is_escaped_at(input, cursor)) || char == '\n' || char == '\r' {
5720                return None;
5721            }
5722            cursor = next;
5723        }
5724        return None;
5725    }
5726
5727    let mut cursor = index;
5728    let mut depth = 0usize;
5729    while cursor < input.len() {
5730        let (next, char) = next_char(input, cursor)?;
5731        // A bare destination terminates on ASCII space or an ASCII control
5732        // character; Unicode whitespace (e.g. U+00A0) is ordinary. A backslash
5733        // before a space is NOT an escape (only ASCII punctuation is escapable),
5734        // so `\ ` still terminates the destination → `[a](\ b)` is not a link.
5735        if (char == ' ' || char.is_ascii_control()) && depth == 0 {
5736            break;
5737        }
5738        if char == '(' && !is_escaped_at(input, cursor) {
5739            depth += 1;
5740            // CommonMark caps balanced parens in a bare destination at depth 32.
5741            if depth > 32 {
5742                return None;
5743            }
5744        } else if char == ')' && !is_escaped_at(input, cursor) {
5745            if depth == 0 {
5746                break;
5747            }
5748            depth -= 1;
5749        }
5750        cursor = next;
5751    }
5752
5753    if cursor == index || depth > 0 {
5754        None
5755    } else {
5756        Some((
5757            unescape_ascii_punctuation(&input[index..cursor]),
5758            LinkDestinationKind::Bare,
5759            cursor,
5760        ))
5761    }
5762}
5763
5764fn parse_link_title(input: &str, index: usize) -> Option<(String, LinkTitleKind, usize)> {
5765    let opener = input.as_bytes().get(index).copied()?;
5766    let (closer, title_kind) = match opener {
5767        b'"' => ('"', LinkTitleKind::DoubleQuote),
5768        b'\'' => ('\'', LinkTitleKind::SingleQuote),
5769        b'(' => (')', LinkTitleKind::Paren),
5770        _ => return None,
5771    };
5772    let mut cursor = index + 1;
5773    while cursor < input.len() {
5774        let (next, char) = next_char(input, cursor)?;
5775        if char == closer && !is_escaped_at(input, cursor) {
5776            if contains_blank_line(&input[index + 1..cursor]) {
5777                return None;
5778            }
5779            return Some((
5780                unescape_ascii_punctuation(&input[index + 1..cursor]),
5781                title_kind,
5782                next,
5783            ));
5784        }
5785        if opener == b'(' && char == '(' && !is_escaped_at(input, cursor) {
5786            return None;
5787        }
5788        cursor = next;
5789    }
5790    None
5791}
5792
5793fn contains_blank_line(input: &str) -> bool {
5794    if !input.bytes().any(|byte| matches!(byte, b'\n' | b'\r')) {
5795        return false;
5796    }
5797    // A title that merely begins or ends with an EOL is allowed; only an INTERIOR
5798    // blank line (a blank line bounded by content on both sides) is rejected. The
5799    // empty first/last line entries that a leading/trailing newline produces are
5800    // boundary artifacts, not blank lines in the title content.
5801    let lines = collect_lines(input, 0);
5802    let interior = lines.len().saturating_sub(1);
5803    lines
5804        .iter()
5805        .take(interior)
5806        .skip(1)
5807        .any(|line| line.text.trim().is_empty())
5808}
5809
5810fn skip_link_resource_space(input: &str, index: usize) -> Option<usize> {
5811    skip_link_resource_space_with_info(input, index).map(|(index, _)| index)
5812}
5813
5814fn skip_link_resource_space_with_info(input: &str, mut index: usize) -> Option<(usize, bool)> {
5815    let mut line_breaks = 0usize;
5816    let mut had_space = false;
5817    while input
5818        .as_bytes()
5819        .get(index)
5820        .is_some_and(|byte| matches!(*byte, b' ' | b'\t' | b'\n' | b'\r'))
5821    {
5822        had_space = true;
5823        match input.as_bytes()[index] {
5824            b'\n' => {
5825                line_breaks += 1;
5826                if line_breaks > 1 {
5827                    return None;
5828                }
5829                index += 1;
5830            }
5831            b'\r' => {
5832                line_breaks += 1;
5833                if line_breaks > 1 {
5834                    return None;
5835                }
5836                if input.as_bytes().get(index + 1) == Some(&b'\n') {
5837                    index += 2;
5838                } else {
5839                    index += 1;
5840                }
5841            }
5842            _ => index += 1,
5843        }
5844    }
5845    Some((index, had_space))
5846}
5847
5848pub(crate) fn parse_character_reference(input: &str, index: usize) -> Option<(usize, String)> {
5849    let rest = input.get(index..)?;
5850    if let Some(rest) = rest
5851        .strip_prefix("&#x")
5852        .or_else(|| rest.strip_prefix("&#X"))
5853    {
5854        let digits = rest.find(';')?;
5855        if digits == 0 || digits > 6 || !rest[..digits].bytes().all(|byte| byte.is_ascii_hexdigit())
5856        {
5857            return None;
5858        }
5859        let value = u32::from_str_radix(&rest[..digits], 16).ok()?;
5860        return Some((
5861            index + 3 + digits + 1,
5862            character_reference_value(value).into(),
5863        ));
5864    }
5865    if let Some(rest) = rest.strip_prefix("&#") {
5866        let digits = rest.find(';')?;
5867        if digits == 0 || digits > 7 || !rest[..digits].bytes().all(|byte| byte.is_ascii_digit()) {
5868            return None;
5869        }
5870        let value = rest[..digits].parse::<u32>().ok()?;
5871        return Some((
5872            index + 2 + digits + 1,
5873            character_reference_value(value).into(),
5874        ));
5875    }
5876
5877    let name_end = rest.find(';')?;
5878    if name_end == 0 || name_end > 32 {
5879        return None;
5880    }
5881    let name = &rest[1..name_end];
5882    named_character_reference(name).map(|value| (index + name_end + 1, value.into()))
5883}
5884
5885/// Decode a numeric character reference codepoint to its scalar value.
5886///
5887/// This follows the CommonMark reference behavior: `U+0000`, the UTF-16
5888/// surrogate range, and codepoints beyond the Unicode scalar range decode to
5889/// `U+FFFD`; every other codepoint decodes to itself.
5890///
5891/// Two deliberate non-behaviors:
5892/// - We do NOT apply the HTML5 Windows-1252 remapping of C1 bytes; `&#128;`
5893///   decodes to `U+0080`, not the Euro sign. The CommonMark reference does not
5894///   perform that remapping.
5895/// - We do NOT extend replacement to the C0/C1 controls, DEL, or the Unicode
5896///   noncharacters the way some HTML-oriented decoders do. Keeping those as
5897///   their literal scalar is what makes the serializer's `&#xNN;` escaping of
5898///   control characters round-trip through a re-parse. The roundtrip corpus
5899///   only pins `{0 -> FFFD, 9 -> tab, 10 -> line feed, surrogate -> FFFD,
5900///   out-of-range -> FFFD}`, all of which this matches.
5901pub(crate) fn character_reference_value(value: u32) -> char {
5902    if value == 0 {
5903        '\u{FFFD}'
5904    } else {
5905        char::from_u32(value).unwrap_or('\u{FFFD}')
5906    }
5907}
5908
5909pub(crate) fn is_escaped_at(input: &str, index: usize) -> bool {
5910    let bytes = input.as_bytes();
5911    let mut cursor = index;
5912    let mut count = 0;
5913    while cursor > 0 && bytes[cursor - 1] == b'\\' {
5914        count += 1;
5915        cursor -= 1;
5916    }
5917    count % 2 == 1
5918}
5919
5920fn parse_definition_destination_title(input: &str) -> Option<ParsedLinkResource> {
5921    let (mut cursor, _) = skip_link_resource_space_with_info(input, 0)?;
5922    let (destination, destination_kind, next) = parse_link_destination(input, cursor)?;
5923    cursor = next;
5924
5925    let (next, had_space) = skip_link_resource_space_with_info(input, cursor)?;
5926    cursor = next;
5927    if cursor >= input.len() {
5928        return Some(ParsedLinkResource {
5929            destination,
5930            destination_kind,
5931            title: None,
5932            title_kind: None,
5933        });
5934    }
5935    if !had_space {
5936        return None;
5937    }
5938
5939    let (title, title_kind, next) = parse_link_title(input, cursor)?;
5940    let after_title = skip_link_resource_space(input, next)?;
5941    (after_title == input.len()).then_some(ParsedLinkResource {
5942        destination,
5943        destination_kind,
5944        title: Some(title),
5945        title_kind: Some(title_kind),
5946    })
5947}
5948
5949fn line_can_start_definition_title(input: &str) -> bool {
5950    let trimmed = input.trim_start();
5951    matches!(trimmed.as_bytes().first(), Some(b'"' | b'\'' | b'('))
5952}
5953
5954fn unescape_ascii_punctuation(input: &str) -> String {
5955    // Only ASCII punctuation is escapable (`\ ` keeps its backslash).
5956    unescape_selected(input, |char| char.is_ascii_punctuation())
5957}
5958
5959fn unescape_string(input: &str) -> String {
5960    unescape_selected(input, |char| char.is_ascii_punctuation() || char == '&')
5961}
5962
5963fn unescape_selected(input: &str, should_unescape: impl Fn(char) -> bool) -> String {
5964    let mut output = String::new();
5965    let mut cursor = 0;
5966    while cursor < input.len() {
5967        if input.as_bytes().get(cursor) == Some(&b'&') {
5968            if let Some((end, value)) = parse_character_reference(input, cursor) {
5969                output.push_str(&value);
5970                cursor = end;
5971                continue;
5972            }
5973        }
5974        let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5975        if char == '\\' {
5976            if let Some((after_escape, escaped)) = next_char(input, next) {
5977                if should_unescape(escaped) {
5978                    output.push(escaped);
5979                } else {
5980                    output.push(char);
5981                    output.push(escaped);
5982                }
5983                cursor = after_escape;
5984            } else {
5985                output.push(char);
5986                cursor = next;
5987            }
5988        } else {
5989            output.push(if char == '\0' { '\u{FFFD}' } else { char });
5990            cursor = next;
5991        }
5992    }
5993    output
5994}
5995
5996fn push_line(output: &mut String, line: &str) {
5997    if !output.is_empty() {
5998        output.push('\n');
5999    }
6000    output.push_str(line);
6001}
6002
6003fn ensure_line_separator(output: &mut String) {
6004    if !output.is_empty() && !ends_with_line_ending(output) {
6005        output.push('\n');
6006    }
6007}
6008
6009fn ends_with_line_ending(input: &str) -> bool {
6010    input.ends_with('\n') || input.ends_with('\r')
6011}
6012
6013fn flush_text(nodes: &mut Vec<Inline>, text: &mut String, text_start: usize, end: usize) {
6014    if !text.is_empty() {
6015        nodes.push(Inline::Text(Text {
6016            meta: NodeMeta::new(Some(Span::new(text_start, end))),
6017            value: core::mem::take(text),
6018        }));
6019    }
6020}
6021
6022fn gfm_link_label_preserves_url_dot_escape(
6023    text: &str,
6024    escaped: char,
6025    options: &SyntaxOptions,
6026    context: InlineContext,
6027) -> bool {
6028    escaped == '.'
6029        && !context.allow_links
6030        && options.constructs.gfm_autolink_literal
6031        && (text.starts_with("www.") || text.starts_with("http://") || text.starts_with("https://"))
6032}
6033
6034fn next_char(input: &str, index: usize) -> Option<(usize, char)> {
6035    let char = input[index..].chars().next()?;
6036    Some((index + char.len_utf8(), char))
6037}
6038
6039/// A CommonMark "Unicode punctuation character" for emphasis/strong flanking:
6040/// ASCII punctuation plus the non-ASCII Unicode `P*`/`S*` categories. Only the
6041/// flanking classification needs the Unicode set; escape/label logic stays
6042/// ASCII-only via `char::is_ascii_punctuation`.
6043fn is_flanking_punctuation(value: char) -> bool {
6044    value.is_ascii_punctuation() || crate::unicode_punctuation::is_unicode_punctuation(value)
6045}
6046
6047/// Fold a reference label to its matching identifier. Per CommonMark, two
6048/// labels match when their RAW source (no backslash unescape, no entity decode)
6049/// agrees after collapsing internal whitespace to a single space, trimming, and
6050/// Unicode case-folding (`to_uppercase()` then `to_lowercase()`). So `[foo\!]`
6051/// does NOT match `[foo!]`, and `[&copy;]` does NOT match `[©]`.
6052///
6053/// The serializer's `normalize_reference_label` delegates here so the
6054/// Shortcut/Collapsed omission oracle stays in lockstep with this matcher.
6055pub(crate) fn normalize_label(label: &str) -> String {
6056    label
6057        // Unicode full casefold maps capital sharp S (ẞ, U+1E9E) to "ss"; Rust's
6058        // `to_uppercase` leaves it unchanged (it is already uppercase), so without
6059        // this `[ẞ]` would not match a `[SS]: …` definition (links 540). This is
6060        // the only char where `to_uppercase().to_lowercase()` diverges from the
6061        // full casefold that matters for label matching.
6062        .replace('ẞ', "ss")
6063        .split_whitespace()
6064        .collect::<Vec<_>>()
6065        .join(" ")
6066        .to_uppercase()
6067        .to_lowercase()
6068}
6069
6070fn definition_exists(definitions: &[String], label: &str) -> bool {
6071    if label.is_empty() || !reference_label_is_within_limit(label) {
6072        return false;
6073    }
6074
6075    let identifier = normalize_label(label);
6076    definitions
6077        .iter()
6078        .any(|definition| definition == &identifier)
6079}
6080
6081fn reference_label_is_within_limit(label: &str) -> bool {
6082    label.chars().take(REFERENCE_LABEL_MAX_CHARS + 1).count() <= REFERENCE_LABEL_MAX_CHARS
6083}
6084
6085fn trim_up_to_three_spaces(input: &str) -> Option<&str> {
6086    let (columns, bytes) = leading_indent(input);
6087    if columns <= 3 {
6088        Some(&input[bytes..])
6089    } else {
6090        None
6091    }
6092}
6093
6094fn fence_start(input: &str) -> Option<(FenceMarker, usize)> {
6095    let marker = match input.as_bytes().first()? {
6096        b'`' => FenceMarker::Backtick,
6097        b'~' => FenceMarker::Tilde,
6098        _ => return None,
6099    };
6100    let byte = match marker {
6101        FenceMarker::Backtick => b'`',
6102        FenceMarker::Tilde => b'~',
6103    };
6104    let length = input
6105        .as_bytes()
6106        .iter()
6107        .take_while(|item| **item == byte)
6108        .count();
6109    if length >= 3 {
6110        Some((marker, length))
6111    } else {
6112        None
6113    }
6114}
6115
6116fn fence_close(input: &str, marker: FenceMarker, length: usize) -> bool {
6117    let byte = match marker {
6118        FenceMarker::Backtick => b'`',
6119        FenceMarker::Tilde => b'~',
6120    };
6121    let count = input
6122        .as_bytes()
6123        .iter()
6124        .take_while(|item| **item == byte)
6125        .count();
6126    count >= length && input[count..].trim().is_empty()
6127}
6128
6129fn trim_closing_hashes(input: &str) -> &str {
6130    let input = input.trim_end();
6131    let hash_start = input.trim_end_matches('#').len();
6132    if hash_start == input.len() {
6133        return input;
6134    }
6135    if hash_start == 0 {
6136        return "";
6137    }
6138
6139    let before = &input[..hash_start];
6140    if before.ends_with(' ') || before.ends_with('\t') {
6141        before.trim_end()
6142    } else {
6143        input
6144    }
6145}
6146
6147fn list_marker_info(input: &str) -> Option<ListMarkerInfo<'_>> {
6148    let trimmed = trim_up_to_three_spaces(input)?;
6149    let indent = input.len() - trimmed.len();
6150    let bytes = trimmed.as_bytes();
6151    match bytes.first()? {
6152        b'-' | b'*' | b'+' if is_list_padding_byte(bytes.get(1).copied()) => {
6153            let delimiter = match bytes[0] {
6154                b'-' => ListDelimiter::Dash,
6155                b'*' => ListDelimiter::Asterisk,
6156                _ => ListDelimiter::Plus,
6157            };
6158            let (content_offset, content_indent) = list_content_offset(trimmed, 1, indent);
6159            Some(ListMarkerInfo {
6160                ordered: false,
6161                start: None,
6162                delimiter,
6163                indent,
6164                marker_len: 1,
6165                content_indent,
6166                content: &trimmed[content_offset..],
6167            })
6168        }
6169        byte if byte.is_ascii_digit() => {
6170            let mut end = 0;
6171            while bytes.get(end).is_some_and(|byte| byte.is_ascii_digit()) {
6172                end += 1;
6173            }
6174            if end > 9 {
6175                return None;
6176            }
6177            let delimiter = match bytes.get(end)? {
6178                b'.' => ListDelimiter::Period,
6179                b')' => ListDelimiter::Paren,
6180                _ => return None,
6181            };
6182            if !is_list_padding_byte(bytes.get(end + 1).copied()) {
6183                return None;
6184            }
6185            let start = trimmed[..end].parse().ok()?;
6186            let marker_len = end + 1;
6187            let (content_offset, content_indent) = list_content_offset(trimmed, marker_len, indent);
6188            Some(ListMarkerInfo {
6189                ordered: true,
6190                start: Some(start),
6191                delimiter,
6192                indent,
6193                marker_len,
6194                content_indent,
6195                content: &trimmed[content_offset..],
6196            })
6197        }
6198        _ => None,
6199    }
6200}
6201
6202fn list_content_offset(input: &str, marker_len: usize, indent: usize) -> (usize, usize) {
6203    let bytes = input.as_bytes();
6204    if bytes.get(marker_len).is_none() {
6205        return (marker_len, indent + marker_len + 1);
6206    }
6207    let mut cursor = marker_len;
6208    let mut column = indent + marker_len;
6209    let marker_end_column = column;
6210    while let Some(byte) = bytes.get(cursor) {
6211        match *byte {
6212            b' ' => column += 1,
6213            b'\t' => column += 4 - (column % 4),
6214            _ => break,
6215        }
6216        cursor += 1;
6217    }
6218    // The line is the marker followed only by whitespace: an empty item whose
6219    // first line is blank. CommonMark §5.2 fixes its content indent at marker
6220    // width + 1 regardless of how many trailing spaces follow, so content on the
6221    // next line indented one column past the marker joins the item.
6222    if cursor >= bytes.len() {
6223        return (cursor, marker_end_column + 1);
6224    }
6225    let padding_columns = column.saturating_sub(marker_end_column);
6226    if padding_columns > 0 && padding_columns <= 4 {
6227        (cursor, column)
6228    } else {
6229        (marker_len + 1, marker_end_column + 1)
6230    }
6231}
6232
6233fn list_marker_first_content<'a>(input: &'a str, marker: ListMarkerInfo<'a>) -> Cow<'a, str> {
6234    let Some(trimmed) = trim_up_to_three_spaces(input) else {
6235        return Cow::Borrowed(marker.content);
6236    };
6237    let after_marker = &trimmed[marker.marker_len..];
6238    if after_marker.starts_with('\t') {
6239        strip_leading_indent_columns_from(after_marker, 1, marker.indent + marker.marker_len)
6240    } else {
6241        Cow::Borrowed(marker.content)
6242    }
6243}
6244
6245fn is_list_padding_byte(byte: Option<u8>) -> bool {
6246    matches!(byte, None | Some(b' ' | b'\t'))
6247}
6248
6249fn same_list_marker(left: ListMarkerInfo<'_>, right: ListMarkerInfo<'_>) -> bool {
6250    // CommonMark §5.3: list items belong to the same list when they share a
6251    // bullet character or ordered delimiter. Indentation does not enter into
6252    // it — `- foo\n - bar\n  - baz` is one four-item bullet list, not three.
6253    left.ordered == right.ordered && left.delimiter == right.delimiter
6254}
6255
6256/// Whether `input` begins a *sibling* item of the current list item.
6257///
6258/// A same-delimiter marker is a sibling only when it is not indented far enough
6259/// to nest inside the current item — i.e. its indent is less than the item's
6260/// `content_indent`. A marker indented at or beyond the content start belongs to
6261/// a sublist within the item and is consumed as item content instead.
6262fn sibling_list_marker_at_line(
6263    input: &str,
6264    first_marker: ListMarkerInfo<'_>,
6265    content_indent: usize,
6266) -> bool {
6267    list_marker_info(input).is_some_and(|candidate| {
6268        same_list_marker(first_marker, candidate) && candidate.indent < content_indent
6269    })
6270}
6271
6272/// Whether `input` begins a list marker belonging to the same list as
6273/// `first_marker` (same ordered/unordered kind and delimiter). Used to tell a
6274/// marker that merely continues the current list apart from one that, by
6275/// changing the marker type, starts a new list (CommonMark §5.3).
6276fn same_list_marker_line(input: &str, first_marker: ListMarkerInfo<'_>) -> bool {
6277    list_marker_info(input).is_some_and(|candidate| same_list_marker(first_marker, candidate))
6278}
6279
6280fn next_nonblank_line(lines: &[Line<'_>], mut index: usize) -> usize {
6281    while index < lines.len() && lines[index].text.trim().is_empty() {
6282        index += 1;
6283    }
6284    index
6285}
6286
6287fn leading_indent(input: &str) -> (usize, usize) {
6288    let mut column = 0usize;
6289    let mut bytes = 0usize;
6290    for byte in input.as_bytes() {
6291        match *byte {
6292            b' ' => column += 1,
6293            b'\t' => column += 4 - (column % 4),
6294            _ => break,
6295        }
6296        bytes += 1;
6297    }
6298    (column, bytes)
6299}
6300
6301fn leading_indent_columns(input: &str) -> usize {
6302    leading_indent(input).0
6303}
6304
6305/// Removes up to `max_columns` columns of leading whitespace, stopping at the
6306/// first non-space/tab byte (tabs advance to the next 4-column tab stop). A tab
6307/// that straddles the column budget is PARTIALLY consumed: the columns beyond the
6308/// budget are re-emitted as spaces (CommonMark tab-expansion of indentation), so
6309/// the result may be an owned `String`. Whitespace already at/over the budget
6310/// (and any literal tab whose start sits at the budget) is returned verbatim.
6311fn strip_leading_indent_columns(input: &str, max_columns: usize) -> Cow<'_, str> {
6312    strip_leading_indent_columns_from(input, max_columns, 0)
6313}
6314
6315fn strip_leading_indent_columns_from(
6316    input: &str,
6317    max_columns: usize,
6318    start_column: usize,
6319) -> Cow<'_, str> {
6320    let mut column = start_column;
6321    let target_column = start_column + max_columns;
6322    for (index, byte) in input.as_bytes().iter().enumerate() {
6323        let next = match *byte {
6324            b' ' => column + 1,
6325            b'\t' => column + (4 - (column % 4)),
6326            _ => return Cow::Borrowed(&input[index..]),
6327        };
6328        if next > target_column {
6329            // A tab whose expansion crosses the budget (its start still inside the
6330            // budget) is split: the over-budget columns survive as spaces.
6331            if *byte == b'\t' && column < target_column {
6332                let residual = next - target_column;
6333                let mut owned = String::with_capacity(residual + input.len() - (index + 1));
6334                for _ in 0..residual {
6335                    owned.push(' ');
6336                }
6337                let mut rest_column = next;
6338                let mut rest_index = index + 1;
6339                while let Some(rest_byte) = input.as_bytes().get(rest_index) {
6340                    match *rest_byte {
6341                        b' ' => {
6342                            owned.push(' ');
6343                            rest_column += 1;
6344                            rest_index += 1;
6345                        }
6346                        b'\t' => {
6347                            let width = 4 - (rest_column % 4);
6348                            for _ in 0..width {
6349                                owned.push(' ');
6350                            }
6351                            rest_column += width;
6352                            rest_index += 1;
6353                        }
6354                        _ => break,
6355                    }
6356                }
6357                owned.push_str(&input[rest_index..]);
6358                return Cow::Owned(owned);
6359            }
6360            return Cow::Borrowed(&input[index..]);
6361        }
6362        column = next;
6363    }
6364    Cow::Borrowed("")
6365}
6366
6367fn strip_list_continuation(input: &str, content_indent: usize, list_indent: usize) -> Cow<'_, str> {
6368    let (indent_columns, indent_bytes) = leading_indent(input);
6369    if indent_columns >= content_indent {
6370        // Remove exactly `content_indent` columns. A tab straddling that budget
6371        // is split: the columns past the budget survive as spaces (CommonMark
6372        // tab expansion of list-item indentation), so a `\t`-only line inside a
6373        // 2-column item keeps the residual two spaces instead of vanishing.
6374        strip_leading_indent_columns(input, content_indent)
6375    } else if indent_columns > list_indent {
6376        Cow::Borrowed(&input[indent_bytes..])
6377    } else {
6378        Cow::Borrowed(trim_ascii_start(input))
6379    }
6380}
6381
6382fn take_task_marker_from_children(children: &mut [Block]) -> Option<bool> {
6383    let Some(Block::Paragraph(paragraph)) = children.first_mut() else {
6384        return None;
6385    };
6386    take_task_marker_from_inlines(&mut paragraph.children)
6387}
6388
6389fn take_task_marker_from_inlines(inlines: &mut Vec<Inline>) -> Option<bool> {
6390    let Some(Inline::Text(text)) = inlines.first() else {
6391        return None;
6392    };
6393    let first = text.value.clone();
6394
6395    if let Some((checked, consumed)) = task_marker_inline_prefix(&first) {
6396        if !first[consumed..].is_empty() || inlines_have_content_after(inlines, 1) {
6397            remove_text_prefix(inlines, consumed);
6398            return Some(checked);
6399        }
6400    }
6401
6402    if let Some(checked) = task_marker_at_text_end(&first) {
6403        if inlines
6404            .get(1)
6405            .is_some_and(|inline| matches!(inline, Inline::SoftBreak(_)))
6406            && inlines_have_content_after(inlines, 2)
6407        {
6408            inlines.remove(1);
6409            inlines.remove(0);
6410            return Some(checked);
6411        }
6412    }
6413
6414    if task_marker_split_open(&first)
6415        && inlines
6416            .get(1)
6417            .is_some_and(|inline| matches!(inline, Inline::SoftBreak(_)))
6418    {
6419        let Some(Inline::Text(next)) = inlines.get(2) else {
6420            return None;
6421        };
6422        if let Some((checked, consumed)) = task_marker_split_close_prefix(&next.value) {
6423            if !next.value[consumed..].is_empty() || inlines_have_content_after(inlines, 3) {
6424                inlines.remove(1);
6425                inlines.remove(0);
6426                remove_text_prefix(inlines, consumed);
6427                return Some(checked);
6428            }
6429        }
6430    }
6431
6432    None
6433}
6434
6435fn task_marker_inline_prefix(input: &str) -> Option<(bool, usize)> {
6436    let start = leading_trim_bytes(input);
6437    let rest = &input[start..];
6438    let checked = task_marker_checked(rest)?;
6439    let after_marker = start + 3;
6440    match input.as_bytes().get(after_marker) {
6441        Some(b' ' | b'\t') => Some((checked, after_marker + 1)),
6442        _ => None,
6443    }
6444}
6445
6446fn task_marker_at_text_end(input: &str) -> Option<bool> {
6447    let start = leading_trim_bytes(input);
6448    let rest = &input[start..];
6449    let checked = task_marker_checked(rest)?;
6450    if rest.len() == 3 {
6451        Some(checked)
6452    } else {
6453        None
6454    }
6455}
6456
6457fn task_marker_split_open(input: &str) -> bool {
6458    let start = leading_trim_bytes(input);
6459    input[start..] == *"["
6460}
6461
6462fn task_marker_split_close_prefix(input: &str) -> Option<(bool, usize)> {
6463    match input.as_bytes().get(..2)? {
6464        b"] " => Some((false, 2)),
6465        b"]\t" => Some((false, 2)),
6466        b"x]" | b"X]" if matches!(input.as_bytes().get(2), Some(b' ' | b'\t')) => Some((true, 3)),
6467        _ => None,
6468    }
6469}
6470
6471fn task_marker_checked(input: &str) -> Option<bool> {
6472    if input.starts_with("[ ]") {
6473        Some(false)
6474    } else if input.starts_with("[x]") || input.starts_with("[X]") {
6475        Some(true)
6476    } else {
6477        None
6478    }
6479}
6480
6481fn remove_text_prefix(inlines: &mut Vec<Inline>, consumed: usize) {
6482    if let Some(Inline::Text(text)) = inlines.first_mut() {
6483        text.value = text.value[consumed..].into();
6484        if text.value.is_empty() {
6485            inlines.remove(0);
6486        }
6487    }
6488}
6489
6490fn inlines_have_content_after(inlines: &[Inline], start: usize) -> bool {
6491    inlines.iter().skip(start).any(|inline| match inline {
6492        Inline::Text(text) => !text.value.is_empty(),
6493        Inline::SoftBreak(_) | Inline::LineBreak(_) => false,
6494        _ => true,
6495    })
6496}
6497
6498fn update_list_item_fence(line: &str, open_fence: &mut Option<(FenceMarker, usize)>) {
6499    let Some(trimmed) = trim_up_to_three_spaces(line) else {
6500        return;
6501    };
6502    if let Some((marker, length)) = *open_fence {
6503        if fence_close(trimmed, marker, length) {
6504            *open_fence = None;
6505        }
6506        return;
6507    }
6508    if let Some((marker, length)) = fence_start(trimmed) {
6509        *open_fence = Some((marker, length));
6510    }
6511}
6512
6513fn trim_ascii_start(input: &str) -> &str {
6514    input.trim_start_matches(|char| matches!(char, ' ' | '\t'))
6515}
6516
6517fn leading_trim_bytes(input: &str) -> usize {
6518    input.len() - trim_ascii_start(input).len()
6519}
6520
6521fn parse_table_delimiter(input: &str, spoiler: bool) -> Option<Vec<TableAlignment>> {
6522    let cells = split_table_row(input, spoiler);
6523    if cells.is_empty() {
6524        return None;
6525    }
6526    let mut alignments = Vec::new();
6527    for cell in cells {
6528        alignments.push(table_delimiter_alignment(cell.trim())?);
6529    }
6530    Some(alignments)
6531}
6532
6533// A delimiter cell is `:?` `-`+ `:?` once trimmed: colons only at the
6534// boundaries, the dashes contiguous, no interior space or colon.
6535fn table_delimiter_alignment(cell: &str) -> Option<TableAlignment> {
6536    let bytes = cell.as_bytes();
6537    let mut cursor = 0;
6538    let left = bytes.first() == Some(&b':');
6539    if left {
6540        cursor += 1;
6541    }
6542    let dash_start = cursor;
6543    while bytes.get(cursor) == Some(&b'-') {
6544        cursor += 1;
6545    }
6546    if cursor == dash_start {
6547        return None;
6548    }
6549    let right = bytes.get(cursor) == Some(&b':');
6550    if right {
6551        cursor += 1;
6552    }
6553    if cursor != bytes.len() {
6554        return None;
6555    }
6556    Some(match (left, right) {
6557        (true, true) => TableAlignment::Center,
6558        (true, false) => TableAlignment::Left,
6559        (false, true) => TableAlignment::Right,
6560        (false, false) => TableAlignment::None,
6561    })
6562}
6563
6564/// Normalizes a table line's leading indentation: when indented code is enabled
6565/// a four-space indent would start a code block, so up to three leading spaces
6566/// are trimmed and four or more disqualifies the line.
6567fn table_indent_line(input: &str, indented_code: bool) -> Option<&str> {
6568    if indented_code {
6569        trim_up_to_three_spaces(input)
6570    } else {
6571        Some(input)
6572    }
6573}
6574
6575// True if a backtick run of `length` at `start` has a matching-length closing
6576// run later in `input`. The table row scanner still treats unescaped pipes as
6577// cell boundaries; this state only prevents extension syntax such as spoilers
6578// from being recognized inside a code span.
6579fn backtick_run_has_close(input: &str, start: usize, length: usize) -> bool {
6580    let bytes = input.as_bytes();
6581    let mut i = start + length;
6582    while i < input.len() {
6583        if bytes[i] == b'`' {
6584            let run = input[i..]
6585                .as_bytes()
6586                .iter()
6587                .take_while(|byte| **byte == b'`')
6588                .count();
6589            if run == length {
6590                return true;
6591            }
6592            i += run;
6593        } else {
6594            i += 1;
6595        }
6596    }
6597    false
6598}
6599
6600fn table_backslash_pipe_run(input: &str, cursor: usize) -> Option<(usize, bool)> {
6601    let bytes = input.as_bytes();
6602    if bytes.get(cursor) != Some(&b'\\') {
6603        return None;
6604    }
6605    let mut pipe = cursor;
6606    while bytes.get(pipe) == Some(&b'\\') {
6607        pipe += 1;
6608    }
6609    (bytes.get(pipe) == Some(&b'|')).then_some((pipe, (pipe - cursor) % 2 == 1))
6610}
6611
6612fn split_table_row(input: &str, spoiler: bool) -> Vec<String> {
6613    let trimmed = input.trim();
6614    let mut cells = Vec::new();
6615    let mut cell = String::new();
6616    let mut cursor = 0;
6617    let mut code_fence = None;
6618    let mut spoiler_open = false;
6619    // Byte offset just past the most recent genuine cell-delimiter pipe. When the
6620    // scan ends with only whitespace after it, that pipe was a trailing border and
6621    // the empty leftover cell is dropped (rather than blindly trusting that the
6622    // line ends with `|`, which mis-fires on a spoiler-close `||` or a code-span
6623    // pipe — see tbl-4).
6624    let mut trailing_delimiter_end = None;
6625
6626    while cursor < trimmed.len() {
6627        let (next, char) = next_char(trimmed, cursor).expect("valid UTF-8 byte index");
6628        // GitHub/cmark-gfm treats an odd backslash run before `|` as a literal
6629        // cell-content pipe, but an even run leaves the pipe as a delimiter. Keep
6630        // the original run before an even delimiter so the inline parser resolves
6631        // the visible backslashes correctly.
6632        if char == '\\' {
6633            if let Some((pipe, escaped)) = table_backslash_pipe_run(trimmed, cursor) {
6634                if escaped {
6635                    for _ in 0..pipe - cursor - 1 {
6636                        cell.push('\\');
6637                    }
6638                    cell.push('|');
6639                    cursor = pipe + 1;
6640                } else {
6641                    for _ in 0..pipe - cursor {
6642                        cell.push('\\');
6643                    }
6644                    cursor = pipe;
6645                }
6646                continue;
6647            }
6648        }
6649        // Backticks are never escapable, so a preceding backslash does not block a
6650        // code-span boundary (a `\` directly before a closing backtick is content,
6651        // not an escape — see tbl-3).
6652        if char == '`' {
6653            let length = trimmed[cursor..]
6654                .as_bytes()
6655                .iter()
6656                .take_while(|byte| **byte == b'`')
6657                .count();
6658            if code_fence == Some(length) {
6659                code_fence = None;
6660            } else if code_fence.is_none() && backtick_run_has_close(trimmed, cursor, length) {
6661                code_fence = Some(length);
6662            }
6663            cell.push_str(&trimmed[cursor..cursor + length]);
6664            cursor += length;
6665            continue;
6666        }
6667
6668        if spoiler
6669            && char == '|'
6670            && trimmed.as_bytes().get(cursor + 1) == Some(&b'|')
6671            && code_fence.is_some()
6672        {
6673            cell.push_str("||");
6674            cursor += 2;
6675            continue;
6676        }
6677
6678        if spoiler
6679            && char == '|'
6680            && trimmed.as_bytes().get(cursor + 1) == Some(&b'|')
6681            && code_fence.is_none()
6682            && !is_escaped_at(trimmed, cursor)
6683        {
6684            let closes_spoiler =
6685                spoiler_open && trimmed.as_bytes().get(cursor.wrapping_sub(1)) != Some(&b'|');
6686            let opens_spoiler = !spoiler_open
6687                && trimmed.as_bytes().get(cursor + 2) != Some(&b'|')
6688                && find_spoiler_close(trimmed, cursor + 2).is_some();
6689            if closes_spoiler || opens_spoiler {
6690                spoiler_open = opens_spoiler;
6691                cell.push_str("||");
6692                cursor += 2;
6693                continue;
6694            }
6695        }
6696
6697        if char == '|' && !spoiler_open && !is_escaped_at(trimmed, cursor) {
6698            cells.push(core::mem::take(&mut cell));
6699            // A delimiter ends the cell; spoiler state never spans a cell boundary.
6700            spoiler_open = false;
6701            trailing_delimiter_end = Some(next);
6702        } else {
6703            cell.push(char);
6704        }
6705        cursor = next;
6706    }
6707    cells.push(cell);
6708
6709    if trimmed.starts_with('|') {
6710        cells.remove(0);
6711    }
6712    // Drop the empty cell created by a trailing border pipe: the last genuine
6713    // delimiter must sit at the very end (only whitespace after it).
6714    if let Some(end) = trailing_delimiter_end {
6715        if trimmed[end..].trim().is_empty() {
6716            cells.pop();
6717        }
6718    }
6719    cells
6720}
6721
6722fn table_can_start(lines: &[Line<'_>], index: usize, options: &SyntaxOptions) -> bool {
6723    if !options.constructs.gfm_table || index + 1 >= lines.len() {
6724        return false;
6725    }
6726    table_can_start_source(
6727        lines[index].text,
6728        lines[index + 1].text,
6729        options.constructs.indented_code,
6730        options.constructs.spoiler,
6731    )
6732}
6733
6734pub(crate) fn gfm_table_can_start_source(header: &str, delimiter: &str) -> bool {
6735    table_can_start_source(header, delimiter, true, false)
6736}
6737
6738fn table_can_start_source(
6739    header: &str,
6740    delimiter: &str,
6741    indented_code: bool,
6742    spoiler: bool,
6743) -> bool {
6744    let Some(delimiter) = table_indent_line(delimiter, indented_code) else {
6745        return false;
6746    };
6747    if list_marker_info(delimiter).is_some() {
6748        return false;
6749    }
6750    if !table_has_separator(header, delimiter, spoiler) {
6751        return false;
6752    }
6753    let Some(alignments) = parse_table_delimiter(delimiter, spoiler) else {
6754        return false;
6755    };
6756    split_table_row(header, spoiler).len() == alignments.len()
6757}
6758
6759fn table_has_separator(header: &str, delimiter: &str, spoiler: bool) -> bool {
6760    // GFM makes leading/trailing pipes optional, so `parse_table_delimiter` plus
6761    // the header/alignment column-count check usually suffice. The one exception
6762    // is a single resolved column with no disambiguating syntax: `a\n-\nb` has
6763    // matching one-column shapes yet no pipe and no alignment colon, so it is a
6764    // loose paragraph/setext, not a table. A single column still forms a table
6765    // when a pipe appears in the header/delimiter or the delimiter carries an
6766    // explicit alignment colon (`a\n-:`, `a\n:-:`, …).
6767    let Some(alignments) = parse_table_delimiter(delimiter, spoiler) else {
6768        return true;
6769    };
6770    if alignments.len() == 1 {
6771        return contains_unescaped_pipe(header, spoiler)
6772            || contains_unescaped_pipe(delimiter, spoiler)
6773            || delimiter.contains(':');
6774    }
6775    true
6776}
6777
6778// Still used by `block_quote_table_body_row` to detect a table row appearing as
6779// a block-quote continuation line (which DOES require a pipe).
6780fn contains_unescaped_pipe(input: &str, spoiler: bool) -> bool {
6781    let mut cursor = 0;
6782    let mut code_fence = None;
6783    let mut spoiler_open = false;
6784    while cursor < input.len() {
6785        let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
6786        if char == '\\' {
6787            if let Some((pipe, escaped)) = table_backslash_pipe_run(input, cursor) {
6788                cursor = if escaped { pipe + 1 } else { pipe };
6789                continue;
6790            }
6791        }
6792        // Backticks are never escapable; a preceding backslash is code-span content.
6793        if char == '`' {
6794            let length = input[cursor..]
6795                .as_bytes()
6796                .iter()
6797                .take_while(|byte| **byte == b'`')
6798                .count();
6799            if code_fence == Some(length) {
6800                code_fence = None;
6801            } else if code_fence.is_none() {
6802                code_fence = Some(length);
6803            }
6804            cursor += length;
6805            continue;
6806        }
6807        if spoiler
6808            && char == '|'
6809            && input.as_bytes().get(cursor + 1) == Some(&b'|')
6810            && code_fence.is_some()
6811        {
6812            cursor += 2;
6813            continue;
6814        }
6815        if spoiler
6816            && char == '|'
6817            && input.as_bytes().get(cursor + 1) == Some(&b'|')
6818            && code_fence.is_none()
6819            && !is_escaped_at(input, cursor)
6820        {
6821            let closes_spoiler =
6822                spoiler_open && input.as_bytes().get(cursor.wrapping_sub(1)) != Some(&b'|');
6823            let opens_spoiler = !spoiler_open
6824                && input.as_bytes().get(cursor + 2) != Some(&b'|')
6825                && find_spoiler_close(input, cursor + 2).is_some();
6826            if closes_spoiler || opens_spoiler {
6827                spoiler_open = opens_spoiler;
6828                cursor += 2;
6829                continue;
6830            }
6831        }
6832        if char == '|' && !spoiler_open && !is_escaped_at(input, cursor) {
6833            return true;
6834        }
6835        cursor = next;
6836    }
6837    false
6838}
6839
6840fn likely_block_start(input: &str, options: &SyntaxOptions) -> bool {
6841    // Block-structure markers (ATX, fences, thematic breaks, list markers, math
6842    // fences, directives, …) only begin a block when indented at most 3 columns.
6843    // At >=4 columns the line is indented code, which never interrupts a
6844    // paragraph, so no marker test should fire.
6845    let Some(trimmed) = trim_up_to_three_spaces(input) else {
6846        return false;
6847    };
6848    trimmed.starts_with('#')
6849        || trimmed.starts_with('>')
6850        || trimmed.starts_with("```")
6851        || trimmed.starts_with("~~~")
6852        || list_marker_can_interrupt_paragraph(input)
6853        || parse_thematic_break(Line {
6854            text: input,
6855            eol: "",
6856            start: 0,
6857            end: input.len(),
6858            end_with_eol: input.len(),
6859            lazy: false,
6860        })
6861        .is_some()
6862        || (options.constructs.html_block && line_starts_interrupting_html_block(input))
6863        || (options.constructs.math_block && math_block_fence_length(trimmed).is_some())
6864        || (options.constructs.directive_container && trimmed.starts_with(":::"))
6865        || (options.constructs.directive_leaf && trimmed.starts_with("::"))
6866        || (options.constructs.footnote_definition && line_starts_footnote_definition(trimmed))
6867}
6868
6869// A GFM footnote definition `[^label]:` is a block boundary: it interrupts a
6870// paragraph and ends a prior footnote's lazy continuation.
6871fn line_starts_footnote_definition(trimmed: &str) -> bool {
6872    trimmed.starts_with("[^")
6873        && find_footnote_definition_label_end(trimmed)
6874            .is_some_and(|close| is_footnote_label(&trimmed[2..close]))
6875}
6876
6877fn list_marker_can_interrupt_paragraph(input: &str) -> bool {
6878    list_marker_info(input).is_some_and(|marker| {
6879        // An empty list item never interrupts a paragraph (CommonMark §5.3):
6880        // `foo\n*` is a single paragraph, not a paragraph plus an empty list.
6881        !marker.content.trim().is_empty() && (!marker.ordered || marker.start == Some(1))
6882    })
6883}
6884
6885// GFM table-body termination is stricter than paragraph interruption: an open
6886// table also ends on a list marker with EMPTY content (`-`, `*`, `1.`), which
6887// `likely_block_start` deliberately ignores for paragraphs. Used only by the
6888// table body loop; `likely_block_start` itself is left untouched.
6889fn table_body_line_ends_table(line: &str, options: &SyntaxOptions) -> bool {
6890    likely_block_start(line, options)
6891        || list_marker_info(line).is_some()
6892        || (options.constructs.html_block && line_starts_html_block(line))
6893}
6894
6895fn line_starts_interrupting_html_block(input: &str) -> bool {
6896    match trim_up_to_three_spaces(input).and_then(html_block_start) {
6897        Some(HtmlBlockKind::UntilBlank) | None => false,
6898        Some(_) => true,
6899    }
6900}
6901
6902fn parse_autolink_end(input: &str, index: usize) -> Option<usize> {
6903    input[index..].find('>').map(|end| index + end + 1)
6904}
6905
6906fn parse_html_inline(input: &str, index: usize) -> Option<(usize, String)> {
6907    let rest = &input[index..];
6908    if rest.starts_with("<!--") {
6909        let end = rest.find("-->")? + 3;
6910        return Some((index + end, rest[..end].into()));
6911    }
6912    if rest.starts_with("<?") {
6913        let end = rest.find("?>")? + 2;
6914        return Some((index + end, rest[..end].into()));
6915    }
6916    if rest.starts_with("<![CDATA[") {
6917        let end = rest.find("]]>")? + 3;
6918        return Some((index + end, rest[..end].into()));
6919    }
6920    if is_declaration_start(rest) {
6921        let end = rest.find('>')? + 1;
6922        return Some((index + end, rest[..end].into()));
6923    }
6924
6925    let (end, _) = parse_html_tag(input, index)?;
6926    Some((end, input[index..end].into()))
6927}
6928
6929fn parse_html_tag(input: &str, index: usize) -> Option<(usize, &str)> {
6930    let bytes = input.as_bytes();
6931    if bytes.get(index) != Some(&b'<') {
6932        return None;
6933    }
6934
6935    let closing = bytes.get(index + 1) == Some(&b'/');
6936    let name_start = index + if closing { 2 } else { 1 };
6937    let first = *bytes.get(name_start)?;
6938    if !first.is_ascii_alphabetic() {
6939        return None;
6940    }
6941
6942    let mut cursor = name_start + 1;
6943    while bytes.get(cursor).is_some_and(|byte| html_name_byte(*byte)) {
6944        cursor += 1;
6945    }
6946    let name = &input[name_start..cursor];
6947
6948    if closing {
6949        cursor = skip_spaces(input, cursor);
6950        if bytes.get(cursor) == Some(&b'>') {
6951            return Some((cursor + 1, name));
6952        }
6953        return None;
6954    }
6955
6956    let mut needs_space = false;
6957    loop {
6958        let before_spaces = cursor;
6959        cursor = skip_spaces(input, cursor);
6960        let had_space = cursor > before_spaces;
6961        match bytes.get(cursor) {
6962            Some(b'>') => return Some((cursor + 1, name)),
6963            Some(b'/') if bytes.get(cursor + 1) == Some(&b'>') => return Some((cursor + 2, name)),
6964            Some(byte) if had_space && html_attribute_name_start(*byte) => {
6965                cursor += 1;
6966                while bytes
6967                    .get(cursor)
6968                    .is_some_and(|byte| html_attribute_name_byte(*byte))
6969                {
6970                    cursor += 1;
6971                }
6972                let after_name = cursor;
6973                let after_spaces = skip_spaces(input, cursor);
6974                if bytes.get(after_spaces) == Some(&b'=') {
6975                    cursor = skip_spaces(input, after_spaces + 1);
6976                    cursor = parse_html_attribute_value(input, cursor)?;
6977                } else {
6978                    cursor = after_name;
6979                }
6980                needs_space = true;
6981            }
6982            Some(_) if needs_space => return None,
6983            _ => return None,
6984        }
6985    }
6986}
6987
6988fn parse_html_attribute_value(input: &str, index: usize) -> Option<usize> {
6989    let bytes = input.as_bytes();
6990    match bytes.get(index)? {
6991        b'"' | b'\'' => {
6992            let quote = bytes[index];
6993            let mut cursor = index + 1;
6994            while cursor < bytes.len() {
6995                if bytes[cursor] == quote {
6996                    return Some(cursor + 1);
6997                }
6998                cursor += 1;
6999            }
7000            None
7001        }
7002        b'=' | b'<' | b'>' | b'`' => None,
7003        _ => {
7004            let mut cursor = index;
7005            while bytes.get(cursor).is_some_and(|byte| {
7006                !byte.is_ascii_whitespace()
7007                    && !matches!(*byte, b'"' | b'\'' | b'=' | b'<' | b'>' | b'`')
7008            }) {
7009                cursor += 1;
7010            }
7011            if cursor == index {
7012                None
7013            } else {
7014                Some(cursor)
7015            }
7016        }
7017    }
7018}
7019
7020fn html_name_byte(byte: u8) -> bool {
7021    byte.is_ascii_alphanumeric() || byte == b'-'
7022}
7023
7024fn html_attribute_name_start(byte: u8) -> bool {
7025    byte.is_ascii_alphabetic() || byte == b'_' || byte == b':'
7026}
7027
7028fn html_attribute_name_byte(byte: u8) -> bool {
7029    byte.is_ascii_alphanumeric() || matches!(byte, b'_' | b':' | b'.' | b'-')
7030}
7031
7032fn skip_spaces(input: &str, mut index: usize) -> usize {
7033    while input
7034        .as_bytes()
7035        .get(index)
7036        .is_some_and(|byte| matches!(*byte, b' ' | b'\t' | b'\n' | b'\r'))
7037    {
7038        index += 1;
7039    }
7040    index
7041}
7042
7043fn is_autolink(input: &str) -> bool {
7044    let inner = &input[1..input.len() - 1];
7045    is_uri_autolink(inner) || is_email_autolink(inner)
7046}
7047
7048fn is_uri_autolink(input: &str) -> bool {
7049    let Some(colon) = input.find(':') else {
7050        return false;
7051    };
7052    let scheme = &input[..colon];
7053    if scheme.len() < 2 || scheme.len() > 32 {
7054        return false;
7055    }
7056    let mut bytes = scheme.bytes();
7057    if !bytes.next().is_some_and(|byte| byte.is_ascii_alphabetic()) {
7058        return false;
7059    }
7060    if !bytes.all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'+' | b'.' | b'-')) {
7061        return false;
7062    }
7063    input[colon + 1..]
7064        .chars()
7065        .all(|char| !matches!(char, '<' | '>') && !char.is_control() && !char.is_whitespace())
7066}
7067
7068fn is_email_autolink(input: &str) -> bool {
7069    if input.chars().any(char::is_whitespace) {
7070        return false;
7071    }
7072    let Some(at) = input.find('@') else {
7073        return false;
7074    };
7075    if at == 0 || at + 1 >= input.len() {
7076        return false;
7077    }
7078    // Angle-bracket `<email>` autolinks use the strict CommonMark domain
7079    // grammar but, unlike the GFM bare form, allow a single (dotless) label.
7080    is_email_local_part(&input[..at]) && is_email_domain(&input[at + 1..], 1)
7081}
7082
7083// GFM literal-autolink dispatch. Tries, in order: `http(s)://` URLs, `www.`
7084// URLs, extended-protocol (`mailto:`/`xmpp:`) emails, and bare emails. Each
7085// branch enforces cmark-gfm's per-scheme preceding-character guard and its
7086// domain/host rules; the trailing trim is shared (`autolink_delim`). The
7087// returned destination is the synthesized href (a `http://`/`mailto:` prefix
7088// may be prepended); the caller keeps `input[index..end]` as the visible
7089// original.
7090fn parse_literal_autolink(
7091    input: &str,
7092    index: usize,
7093    gfm: bool,
7094    relaxed: bool,
7095) -> Option<(usize, String)> {
7096    let rest = &input[index..];
7097
7098    if gfm {
7099        // `http://` / `https://` URLs. cmark requires the char before the scheme
7100        // to be non-alphanumeric (so `mmmhttp://…` does not link from `mmmh`).
7101        if let Some(scheme_len) = rest
7102            .starts_with("http://")
7103            .then_some(7)
7104            .or_else(|| rest.starts_with("https://").then_some(8))
7105        {
7106            if !literal_scheme_prefix_ok(input, index) {
7107                return None;
7108            }
7109            let host = &input[index + scheme_len..];
7110            // A non-empty domain or bracketed IPv6 host is additionally
7111            // required, so `http://`, `http://#`, `http://$` are not links.
7112            if !http_literal_host_ok(host) {
7113                if relaxed {
7114                    // Let cmark-gfm's relaxed `scheme://` pass decide cases
7115                    // such as a bare `http://` followed by whitespace.
7116                } else {
7117                    return None;
7118                }
7119            } else {
7120                // The URL extent is scanned from the very start (after `://`) and the
7121                // trailing trim runs over the whole URL. Relaxed mode balances
7122                // brackets/braces so `[abc]`/`{abc}`/IPv6 hosts stay in the URL.
7123                let end = autolink_url_end(input, index + scheme_len, index + scheme_len, relaxed);
7124                if end <= index + scheme_len {
7125                    return None;
7126                }
7127                if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7128                    return None;
7129                }
7130                return Some((end, input[index..end].into()));
7131            }
7132        }
7133
7134        // `www.` URLs (synthesize a `http://` href). cmark allows the preceding
7135        // char to be one of `*_~(` or whitespace (or start of input).
7136        if rest
7137            .as_bytes()
7138            .get(..4)
7139            .is_some_and(|prefix| prefix.eq_ignore_ascii_case(b"www."))
7140        {
7141            if !literal_www_prefix_ok(input, index) {
7142                return None;
7143            }
7144            check_domain(rest, false)?;
7145            let end = autolink_url_end(input, index, index, relaxed);
7146            if end <= index || (!relaxed && end <= index + 3 && !literal_starts_line(input, index))
7147            {
7148                return None;
7149            }
7150            if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7151                return None;
7152            }
7153            let mut destination = String::from("http://");
7154            destination.push_str(&input[index..end]);
7155            return Some((end, destination));
7156        }
7157
7158        if let Some(email) = parse_literal_email(input, index) {
7159            return Some(email);
7160        }
7161    }
7162
7163    if relaxed {
7164        // cmark-gfm "relaxed" URL autolinks: a bare `scheme://…` for any scheme
7165        // (`smb://`, `irc://`, `rdar://`, `we://`, `nex://[…]`, …) or a
7166        // scheme-less leading `://…` (`://-`). Requires the same non-alphanumeric
7167        // preceding char as the http literal and at least one non-whitespace
7168        // char after `://`; no host/domain validation (cmark-gfm is permissive
7169        // here — `smb:///path` and `://-` both linkify). The extent is balanced.
7170        if literal_scheme_prefix_ok(input, index) {
7171            if let Some(after_slashes) = relaxed_scheme_after_slashes(rest) {
7172                let body_start = index + after_slashes;
7173                let next = input[body_start..].chars().next();
7174                if next.is_none_or(|char| char.is_whitespace()) && after_slashes == 3 {
7175                    return None;
7176                }
7177                let end = autolink_url_end(input, body_start, body_start, true);
7178                if end > index {
7179                    if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7180                        return None;
7181                    }
7182                    return Some((end, input[index..end].into()));
7183                }
7184            }
7185        }
7186    }
7187
7188    None
7189}
7190
7191// Returns the byte offset (within `rest`) just past a relaxed `scheme://` (any
7192// ASCII-alpha-then-`[alnum+. -]` scheme) or scheme-less `://` prefix, if `rest`
7193// starts with one. No scheme length cap — cmark-gfm's relaxed autolink is
7194// permissive. Returns `None` for a bare `scheme:` without `//` (that is the
7195// email/angle-autolink path's job).
7196fn relaxed_scheme_after_slashes(rest: &str) -> Option<usize> {
7197    let bytes = rest.as_bytes();
7198    if bytes.starts_with(b"://") {
7199        return Some(3);
7200    }
7201    let first = bytes.first()?;
7202    if !first.is_ascii_alphabetic() {
7203        return None;
7204    }
7205    let mut i = 1;
7206    while i < bytes.len() {
7207        match bytes[i] {
7208            b':' => break,
7209            byte if byte.is_ascii_alphanumeric() || matches!(byte, b'+' | b'.' | b'-') => i += 1,
7210            _ => return None,
7211        }
7212    }
7213    if bytes.get(i..i + 3) == Some(b"://") {
7214        Some(i + 3)
7215    } else {
7216        None
7217    }
7218}
7219
7220// The char immediately before a `http(s)://` literal must be non-alphabetic.
7221// An escaped `<` (`\<http://…`) is just literal text before the URL, so the
7222// literal still forms (the `<` is not treated as an angle-autolink opener).
7223fn literal_scheme_prefix_ok(input: &str, index: usize) -> bool {
7224    if index == 0 {
7225        return true;
7226    }
7227    let Some(previous) = input[..index].chars().next_back() else {
7228        return true;
7229    };
7230    !previous.is_ascii_alphabetic()
7231}
7232
7233// The char before a `www.` literal must be one of cmark-gfm's accepted ASCII
7234// delimiters or ordinary Markdown layout whitespace. Unicode whitespace is not
7235// a start delimiter for this branch.
7236fn literal_www_prefix_ok(input: &str, index: usize) -> bool {
7237    if index == 0 {
7238        return true;
7239    }
7240    let Some(previous) = input[..index].chars().next_back() else {
7241        return true;
7242    };
7243    if matches!(previous, '*' | '_' | '~' | '(' | '[' | ']') {
7244        return true;
7245    }
7246    matches!(previous, ' ' | '\t' | '\n' | '\r')
7247}
7248
7249fn literal_starts_line(input: &str, index: usize) -> bool {
7250    index == 0
7251        || input
7252            .as_bytes()
7253            .get(index - 1)
7254            .is_some_and(|byte| matches!(byte, b'\n' | b'\r'))
7255}
7256
7257fn literal_autolink_suppressed_by_link_label(
7258    input: &str,
7259    index: usize,
7260    end: usize,
7261    relaxed: bool,
7262    gfm_autolink_literal: bool,
7263) -> bool {
7264    if !has_unclosed_link_label_opener(input, index) {
7265        return false;
7266    }
7267    if input[end..].starts_with("](") && !link_resource_tail_has_close(input, end + 2) {
7268        return true;
7269    }
7270    !relaxed && !gfm_autolink_literal && input.as_bytes().get(end).is_some_and(|byte| *byte == b']')
7271}
7272
7273fn has_unclosed_link_label_opener(input: &str, index: usize) -> bool {
7274    let line_start = input[..index]
7275        .rfind(['\n', '\r'])
7276        .map_or(0, |offset| offset + 1);
7277    let mut depth = 0usize;
7278    let mut cursor = line_start;
7279    while cursor < index {
7280        let Some((next, char)) = next_char(input, cursor) else {
7281            break;
7282        };
7283        match char {
7284            '\\' => {
7285                cursor = next_char(input, next)
7286                    .map(|(after_escape, _)| after_escape)
7287                    .unwrap_or(next);
7288                continue;
7289            }
7290            '[' => depth += 1,
7291            ']' => {
7292                depth = depth.saturating_sub(1);
7293            }
7294            _ => {}
7295        }
7296        cursor = next;
7297    }
7298    depth > 0
7299}
7300
7301fn link_resource_tail_has_close(input: &str, start: usize) -> bool {
7302    let mut cursor = start;
7303    while cursor < input.len() {
7304        let Some((next, char)) = next_char(input, cursor) else {
7305            break;
7306        };
7307        match char {
7308            '\\' => {
7309                cursor = next_char(input, next)
7310                    .map(|(after_escape, _)| after_escape)
7311                    .unwrap_or(next);
7312                continue;
7313            }
7314            '\n' | '\r' => return false,
7315            ')' => return true,
7316            _ => {}
7317        }
7318        cursor = next;
7319    }
7320    false
7321}
7322
7323fn http_literal_host_ok(host: &str) -> bool {
7324    if host.starts_with('[') {
7325        return bracketed_ipv6_host_end(host).is_some();
7326    }
7327    match host.chars().next() {
7328        Some(char) if char.is_ascii() && char.is_ascii_alphanumeric() => {
7329            check_domain(host, true).is_some()
7330        }
7331        Some(char) if !char.is_ascii() && is_valid_hostchar(char) => {
7332            check_domain(host, true).is_some()
7333        }
7334        _ => false,
7335    }
7336}
7337
7338fn bracketed_ipv6_host_end(host: &str) -> Option<usize> {
7339    let close = host.find(']')?;
7340    (close > 1).then_some(close + 1)
7341}
7342
7343// Port of cmark-gfm `is_valid_hostchar`: a host char is valid when it is not a
7344// Unicode space and not a Unicode punctuation character.
7345fn is_valid_hostchar(char: char) -> bool {
7346    !char.is_whitespace() && !crate::unicode_punctuation::is_unicode_punctuation(char)
7347}
7348
7349// Port of cmark-gfm `check_domain`. Scans the leading host of `data` (up to the
7350// first non-host char) and returns its byte length, or `None` when invalid.
7351// Rejects a `_` in either of the last two `.`-separated host segments (unless
7352// the host has >10 segments — a DoS guard). When `allow_short` is false a dot
7353// is required (the `www.` rule). The URL extent past the host is determined by
7354// `autolink_url_end`, so the precise length here only gates validity.
7355//
7356// cmark walks bytes with `is_valid_hostchar` decoding each char; this walks
7357// chars directly (UTF-8 safe) over the host prefix, which yields the same
7358// dot/underscore-segment verdict. A `\` escapes the following char.
7359fn check_domain(data: &str, allow_short: bool) -> Option<usize> {
7360    let mut np = 0usize;
7361    let mut uscore1 = 0usize;
7362    let mut uscore2 = 0usize;
7363    let mut host_len = 0usize;
7364
7365    let mut chars = data.char_indices().peekable();
7366    while let Some((offset, char)) = chars.next() {
7367        // cmark's accounting loop runs `for (i = 1; i < size - 1; i++)`: it
7368        // never inspects the first char (offset 0) nor the final char of the
7369        // chunk. We replicate that — a trailing `_` (e.g. `http://a_`) is not
7370        // counted, so the link still forms.
7371        let account = offset != 0 && chars.peek().is_some();
7372        match char {
7373            '\\' => {
7374                // Escape: consume the next char as a literal host char.
7375                host_len = offset + char.len_utf8();
7376                if let Some((next_off, next)) = chars.next() {
7377                    host_len = next_off + next.len_utf8();
7378                }
7379            }
7380            '_' if account => {
7381                uscore2 += 1;
7382                host_len = offset + char.len_utf8();
7383            }
7384            '.' if account => {
7385                uscore1 = uscore2;
7386                uscore2 = 0;
7387                np += 1;
7388                host_len = offset + char.len_utf8();
7389            }
7390            '_' | '.' | '-' => {
7391                host_len = offset + char.len_utf8();
7392            }
7393            _ => {
7394                if !is_valid_hostchar(char) {
7395                    break;
7396                }
7397                host_len = offset + char.len_utf8();
7398            }
7399        }
7400    }
7401
7402    if (uscore1 > 0 || uscore2 > 0) && np <= 10 {
7403        return None;
7404    }
7405
7406    if allow_short || np > 0 {
7407        Some(host_len)
7408    } else {
7409        None
7410    }
7411}
7412
7413// Forward scan from `start` for the URL extent: every char up to whitespace,
7414// `<`, or `]` ends the URL. CommonMark allows `>` and `[` inside (the renderer
7415// percent-encodes them); a `]` is additionally treated as a hard URL boundary
7416// (autolink-3), so a `]` ends the scan and is never part of the link.
7417// `trim_from` is where the trailing trim may reach (the URL start).
7418fn autolink_url_end(input: &str, start: usize, trim_from: usize, balanced: bool) -> usize {
7419    let bytes = input.as_bytes();
7420    let mut end = start;
7421    // Relaxed (cmark-gfm) URL extents balance `[`/`]` and `{`/`}` so an IPv6
7422    // host `nex://[fe80…]/z` and a balanced `[abc]`/`{abc}` run stay inside the
7423    // URL while an unbalanced trailing `]`/`}` ends it. Strict (GFM literal)
7424    // extents stop at the first `]` (no balancing) — the two oracle shapes
7425    // differ on purpose (`autolink_brackets_unbalanced` keeps both `]`;
7426    // `autolink_relaxed_links_brackets_balanced` keeps one).
7427    let mut bracket_depth = 0i32;
7428    let mut curly_depth = 0i32;
7429    let mut strict_has_open_bracket = false;
7430    let mut strict_inside_backticks = false;
7431    for (offset, char) in input[start..].char_indices() {
7432        if char.is_whitespace() || char == '<' || is_autolink_terminating_control(char) {
7433            break;
7434        }
7435        if balanced {
7436            match char {
7437                '[' => bracket_depth += 1,
7438                ']' => {
7439                    if bracket_depth > 0 {
7440                        bracket_depth -= 1;
7441                    } else {
7442                        break;
7443                    }
7444                }
7445                '{' => curly_depth += 1,
7446                '}' => {
7447                    if curly_depth > 0 {
7448                        curly_depth -= 1;
7449                    } else {
7450                        break;
7451                    }
7452                }
7453                _ => {}
7454            }
7455        } else {
7456            match char {
7457                '[' => strict_has_open_bracket = true,
7458                '`' => strict_inside_backticks = !strict_inside_backticks,
7459                ']' if !strict_has_open_bracket && !strict_inside_backticks => break,
7460                _ => {}
7461            }
7462        }
7463        // Round-trip guard: when a literal autolink ends (a trailing entity
7464        // run, punctuation trim, unbalanced `)`, or the `]`/`<` hard boundary),
7465        // the text that follows often begins with a char the serializer escapes
7466        // with a backslash (`\&`, `\[`, `\]`, `\<`, `\>`, `\*`, `\_`, …). The
7467        // URL scan must stop at such a `\<punct>` so the escape is not re-merged
7468        // into the destination. A `\` before `.` (or any non-punctuation) is a
7469        // genuine literal backslash inside the URL (e.g. `www.x.com/a\.`), which
7470        // the serializer never produces, so it stays part of the URL.
7471        if char == '\\' {
7472            if let Some(&next) = bytes.get(start + offset + 1) {
7473                let next_is_escapable_punct = next.is_ascii_punctuation() && next != b'.';
7474                if next_is_escapable_punct {
7475                    break;
7476                }
7477            }
7478        }
7479        end = start + offset + char.len_utf8();
7480    }
7481    autolink_delim(input, trim_from, end)
7482}
7483
7484fn is_autolink_terminating_control(char: char) -> bool {
7485    matches!(char, '\u{2066}'..='\u{2069}')
7486}
7487
7488// Port of cmark-gfm `autolink_delim`: trim trailing delimiters from the end of
7489// the URL. A trailing `) ? ! . , : * _ ~ ' "` is trimmed; `)` only when there
7490// are more `)` than `(` in the link; a trailing `&…;` entity run is excluded
7491// whole; a lone trailing `;` is trimmed.
7492fn autolink_delim(input: &str, start: usize, mut end: usize) -> usize {
7493    let bytes = input.as_bytes();
7494    let mut opening = 0usize;
7495    let mut closing = 0usize;
7496    for &byte in &bytes[start..end] {
7497        match byte {
7498            b'(' => opening += 1,
7499            b')' => closing += 1,
7500            _ => {}
7501        }
7502    }
7503
7504    while end > start {
7505        match bytes[end - 1] {
7506            b')' => {
7507                if closing <= opening {
7508                    break;
7509                }
7510                closing -= 1;
7511                end -= 1;
7512            }
7513            b'?' | b'!' | b'.' | b',' | b':' | b'*' | b'_' | b'~' | b'\'' | b'"' => {
7514                end -= 1;
7515            }
7516            b';' => {
7517                // A trailing hex numeric character reference `&#x…;` is excluded
7518                // whole. This is the round-trip dual of the serializer, which
7519                // encodes a text char that would otherwise merge into the URL as
7520                // a hex entity; no autolink-oracle URL ends in `&#x…;`, so this
7521                // is conformance-safe (decimal `&#…;` is left intact to match
7522                // the oracle, which keeps `www.a&#35` in the URL).
7523                if let Some(amp) = trailing_hex_entity_run_start(bytes, start, end) {
7524                    end = amp;
7525                } else {
7526                    // Walk back over alphanumerics; if they reach a `&`, exclude
7527                    // the whole `&…;` entity run, otherwise trim just the `;`.
7528                    let mut new_end = end - 1;
7529                    while new_end > start && bytes[new_end - 1].is_ascii_alphanumeric() {
7530                        new_end -= 1;
7531                    }
7532                    if new_end > start && new_end < end - 1 && bytes[new_end - 1] == b'&' {
7533                        end = new_end - 1;
7534                    } else {
7535                        end -= 1;
7536                    }
7537                }
7538            }
7539            _ => break,
7540        }
7541    }
7542    end
7543}
7544
7545// When the URL ends with a hex numeric character reference `&#x[hex]+;`, returns
7546// the offset of its leading `&`; otherwise `None`. Used only by `autolink_delim`
7547// to trim the serializer's round-trip boundary marker (the serializer encodes a
7548// would-merge text char as `&#xNN;`). Decimal `&#…;` is intentionally NOT
7549// matched so the oracle's `www.a&#35` URLs stay intact.
7550fn trailing_hex_entity_run_start(bytes: &[u8], start: usize, end: usize) -> Option<usize> {
7551    if end <= start || bytes[end - 1] != b';' {
7552        return None;
7553    }
7554    let mut cursor = end - 1;
7555    while cursor > start && bytes[cursor - 1].is_ascii_hexdigit() {
7556        cursor -= 1;
7557    }
7558    // Require at least one hex digit, then `&#x` (case-insensitive `x`).
7559    if cursor == end - 1 || cursor < start + 3 {
7560        return None;
7561    }
7562    let x = bytes[cursor - 1];
7563    if (x == b'x' || x == b'X') && bytes[cursor - 2] == b'#' && bytes[cursor - 3] == b'&' {
7564        Some(cursor - 3)
7565    } else {
7566        None
7567    }
7568}
7569
7570// GFM bare-email literal (and the extended `mailto:`/`xmpp:` protocol forms).
7571// `index` must be the link start: cmark anchors the email at the left edge
7572// found by rewinding from `@` over `[A-Za-z0-9._+-]` (or a `mailto:`/`xmpp:`
7573// scheme), so this only succeeds when the char before `index` is not part of
7574// that left extent.
7575fn parse_literal_email(input: &str, index: usize) -> Option<(usize, String)> {
7576    let rest = &input[index..];
7577    let at = rest.find('@')?;
7578    if at == 0 {
7579        return None;
7580    }
7581    let local = &rest[..at];
7582
7583    // Determine whether this `@` is preceded by an extended protocol scheme
7584    // (`mailto:` / `xmpp:`), which both relaxes the href synthesis and (xmpp)
7585    // allows `/` in the domain.
7586    let (auto_mailto, is_xmpp) = classify_email_local(local);
7587
7588    // Left-boundary guard (autolink-1): the char before `index` must not be a
7589    // local-part continuation char, otherwise the true link starts earlier and
7590    // this position is interior. After a recognized scheme, the scheme's own
7591    // preceding-char rule is what matters.
7592    if !email_left_boundary_ok(input, index, auto_mailto) {
7593        return None;
7594    }
7595
7596    if !email_local_is_valid(local, auto_mailto) {
7597        return None;
7598    }
7599
7600    let domain_start = index + at + 1;
7601    let domain_end = literal_email_domain_end(input, domain_start, is_xmpp)?;
7602    let trimmed = autolink_delim(input, domain_start, domain_end);
7603    if trimmed <= domain_start {
7604        return None;
7605    }
7606
7607    let domain = &input[domain_start..trimmed];
7608    if !is_gfm_email_domain(domain, is_xmpp) {
7609        return None;
7610    }
7611
7612    let mut destination = String::new();
7613    if auto_mailto {
7614        destination.push_str("mailto:");
7615    }
7616    destination.push_str(&input[index..trimmed]);
7617    Some((trimmed, destination))
7618}
7619
7620// Classify the local part for the extended-protocol forms. Returns
7621// `(auto_mailto, is_xmpp)`: `mailto:user` → (false, false); `xmpp:user` →
7622// (false, true); a bare local part → (true, false). The scheme match is
7623// case-insensitive.
7624fn classify_email_local(local: &str) -> (bool, bool) {
7625    if let Some(rest) = strip_ci_prefix(local, "mailto:") {
7626        if !rest.is_empty() {
7627            return (false, false);
7628        }
7629    }
7630    if let Some(rest) = strip_ci_prefix(local, "xmpp:") {
7631        if !rest.is_empty() {
7632            return (false, true);
7633        }
7634    }
7635    (true, false)
7636}
7637
7638fn strip_ci_prefix<'a>(input: &'a str, prefix: &str) -> Option<&'a str> {
7639    let bytes = input.as_bytes();
7640    let plen = prefix.len();
7641    if bytes.len() >= plen && bytes[..plen].eq_ignore_ascii_case(prefix.as_bytes()) {
7642        Some(&input[plen..])
7643    } else {
7644        None
7645    }
7646}
7647
7648// The left-boundary check for an email literal. The link is anchored at its
7649// true left edge: the preceding char must not be an ASCII alphanumeric (which
7650// would extend the local part leftward). For the bare form, a preceding `/` is
7651// also rejected (`/a@b.c` is not linked), while the extended
7652// `mailto:`/`xmpp:` form permits `/` before the scheme (so
7653// `…/mailto:beedrill@…` links).
7654fn email_left_boundary_ok(input: &str, index: usize, auto_mailto: bool) -> bool {
7655    if index == 0 {
7656        return true;
7657    }
7658    let Some(previous) = input[..index].chars().next_back() else {
7659        return true;
7660    };
7661    if previous.is_ascii_alphanumeric() {
7662        if auto_mailto
7663            && input[index..].starts_with('+')
7664            && prefix_ends_with_gfm_email(input, index)
7665        {
7666            return true;
7667        }
7668        return false;
7669    }
7670    if auto_mailto && previous == '/' {
7671        return false;
7672    }
7673    true
7674}
7675
7676fn prefix_ends_with_gfm_email(input: &str, end: usize) -> bool {
7677    let start = input[..end]
7678        .rfind(char::is_whitespace)
7679        .map_or(0, |offset| offset + 1);
7680    let candidate = &input[start..end];
7681    let Some(at) = candidate.rfind('@') else {
7682        return false;
7683    };
7684    email_local_is_valid(&candidate[..at], true) && is_gfm_email_domain(&candidate[at + 1..], false)
7685}
7686
7687// Validate the email local part. For the bare form, every char must be a GFM
7688// email atext byte (`[A-Za-z0-9.+_-]` plus the dot-separated structure). For
7689// the extended-protocol forms, the part after the scheme is validated.
7690fn email_local_is_valid(local: &str, auto_mailto: bool) -> bool {
7691    let body = if auto_mailto {
7692        local
7693    } else if let Some(rest) = strip_ci_prefix(local, "mailto:") {
7694        rest
7695    } else if let Some(rest) = strip_ci_prefix(local, "xmpp:") {
7696        rest
7697    } else {
7698        local
7699    };
7700    !body.is_empty() && body.bytes().all(is_gfm_email_local_byte)
7701}
7702
7703// GFM email local-part charset (autolink-1): a narrower set than RFC atext,
7704// matching cmark's rewind class `[A-Za-z0-9.+_-]`.
7705fn is_gfm_email_local_byte(byte: u8) -> bool {
7706    byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b'+' | b'_' | b'-')
7707}
7708
7709fn is_email_local_part(input: &str) -> bool {
7710    !input.is_empty()
7711        && input
7712            .split('.')
7713            .all(|segment| !segment.is_empty() && segment.bytes().all(is_email_atext))
7714}
7715
7716fn is_email_atext(byte: u8) -> bool {
7717    byte.is_ascii_alphanumeric()
7718        || matches!(
7719            byte,
7720            b'!' | b'#'
7721                | b'$'
7722                | b'%'
7723                | b'&'
7724                | b'\''
7725                | b'*'
7726                | b'+'
7727                | b'/'
7728                | b'='
7729                | b'?'
7730                | b'^'
7731                | b'_'
7732                | b'`'
7733                | b'{'
7734                | b'|'
7735                | b'}'
7736                | b'~'
7737                | b'-'
7738        )
7739}
7740
7741// Port of cmark-gfm's email-domain scan (`postprocess_text`). Scans forward
7742// from `index` over the email domain, accepting alphanumerics, `-`, `_`, and
7743// `.`; for the `xmpp:` form a `/` is also accepted (path). A dot only counts
7744// toward the "at least one dot" requirement when it is followed by an
7745// alphanumeric. The scanned span must be >= 1 byte, contain at least one such
7746// dot, and end in an alphabetic char or a dot. Returns the domain end offset
7747// (before trailing trim), or `None` when invalid.
7748fn literal_email_domain_end(input: &str, index: usize, is_xmpp: bool) -> Option<usize> {
7749    let bytes = input.as_bytes();
7750    let mut end = index;
7751    let mut np = 0usize;
7752    while end < bytes.len() {
7753        let byte = bytes[end];
7754        if byte.is_ascii_alphanumeric() {
7755            end += 1;
7756        } else if byte == b'.' && end + 1 < bytes.len() && bytes[end + 1].is_ascii_alphanumeric() {
7757            np += 1;
7758            end += 1;
7759        } else if byte == b'-' || byte == b'_' || (byte == b'/' && is_xmpp) {
7760            // `-`/`_` always continue the domain; `/` continues only the xmpp
7761            // path form.
7762            end += 1;
7763        } else {
7764            break;
7765        }
7766    }
7767    if end <= index {
7768        return None;
7769    }
7770    let len = end - index;
7771    let last = bytes[end - 1];
7772    if len < 1 || np == 0 || !(last.is_ascii_alphabetic() || last == b'.') {
7773        return None;
7774    }
7775    Some(end)
7776}
7777
7778// Final structural validation of the trimmed email domain. The cmark scan
7779// already enforced the dot/last-char rules; this re-checks them after the
7780// shared trailing trim removed any delimiters, and rejects a domain ending in
7781// `-`/`_` (autolink-7: a hyphen in the final label disqualifies the link).
7782fn is_gfm_email_domain(input: &str, is_xmpp: bool) -> bool {
7783    if input.is_empty() {
7784        return false;
7785    }
7786    // A `/` path is only legal in the `xmpp:` form; split it off for the host
7787    // structural checks.
7788    let host = if is_xmpp {
7789        input.split('/').next().unwrap_or(input)
7790    } else {
7791        input
7792    };
7793    if !host.contains('.') {
7794        return false;
7795    }
7796    let last = host.as_bytes()[host.len() - 1];
7797    // The final label must not end in `-` or `_`, and the trailing label may
7798    // not be all ASCII digits.
7799    if matches!(last, b'-' | b'_') {
7800        return false;
7801    }
7802    host.split('.').all(|label| {
7803        !label.is_empty()
7804            && label
7805                .bytes()
7806                .all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_'))
7807    })
7808}
7809
7810fn is_email_domain(input: &str, min_labels: usize) -> bool {
7811    let mut label_count = 0usize;
7812    for label in input.split('.') {
7813        label_count += 1;
7814        let bytes = label.as_bytes();
7815        if bytes.is_empty()
7816            || bytes.len() > 63
7817            || !bytes
7818                .first()
7819                .is_some_and(|byte| byte.is_ascii_alphanumeric())
7820            || !bytes
7821                .last()
7822                .is_some_and(|byte| byte.is_ascii_alphanumeric())
7823            || !bytes
7824                .iter()
7825                .all(|byte| byte.is_ascii_alphanumeric() || *byte == b'-')
7826        {
7827            return false;
7828        }
7829    }
7830    label_count >= min_labels
7831}
7832
7833fn is_footnote_label(label: &str) -> bool {
7834    !label.is_empty()
7835        && reference_label_is_within_limit(label)
7836        && !label.chars().any(char::is_whitespace)
7837}
7838
7839fn find_footnote_definition_label_end(input: &str) -> Option<usize> {
7840    let close = find_footnote_reference_label_end(input, 2)?;
7841    if input.as_bytes().get(close + 1) == Some(&b':') {
7842        Some(close)
7843    } else {
7844        None
7845    }
7846}
7847
7848fn find_footnote_reference_label_end(input: &str, mut cursor: usize) -> Option<usize> {
7849    while cursor < input.len() {
7850        let (next, char) = next_char(input, cursor)?;
7851        if char == ']' && !is_escaped_at(input, cursor) {
7852            return Some(cursor);
7853        }
7854        cursor = next;
7855    }
7856    None
7857}
7858
7859fn find_inline_footnote_end(input: &str, mut cursor: usize) -> Option<usize> {
7860    let mut depth = 0usize;
7861    while cursor < input.len() {
7862        let (next, char) = next_char(input, cursor)?;
7863        if !is_escaped_at(input, cursor) {
7864            match char {
7865                '[' => depth += 1,
7866                ']' if depth == 0 => return Some(cursor),
7867                ']' => depth = depth.saturating_sub(1),
7868                _ => {}
7869            }
7870        }
7871        cursor = next;
7872    }
7873    None
7874}
markdown_syntax/parse.rs

markdown_syntax/
parse.rs