Skip to main content

markdown_syntax/
parse.rs

1//! Markdown source to AST. The entry points are the free [`parse`] function
2//! (maximal default dialect) and the [`SyntaxOptions::parse`] /
3//! [`SyntaxOptions::parse_strict`] methods. Parsing is tolerant: problems are
4//! collected as [`Diagnostic`]s rather than aborting.
5
6use alloc::{borrow::Cow, string::String, vec, vec::Vec};
7
8use crate::{
9    ast::*,
10    diagnostic::{Diagnostic, DiagnosticCode, DiagnosticSeverity},
11    entities::named_character_reference,
12    options::{SyntaxConfigError, SyntaxOptions},
13    span::Span,
14    validate::is_directive_name,
15};
16
17/// The result of a tolerant parse: the document plus any diagnostics gathered
18/// along the way (empty on a clean parse).
19#[derive(Clone, Debug, Eq, PartialEq)]
20pub struct ParseOutput {
21    /// The parsed document tree.
22    pub document: Document,
23    /// Diagnostics collected during parsing.
24    pub diagnostics: Vec<Diagnostic>,
25}
26
27/// The error returned by [`SyntaxOptions::parse_strict`].
28#[derive(Clone, Debug, Eq, PartialEq)]
29pub enum ParseStrictError {
30    /// The options themselves were contradictory.
31    Config(SyntaxConfigError),
32    /// An error-severity diagnostic was promoted to a hard failure.
33    Diagnostic(Diagnostic),
34}
35
36#[derive(Clone, Debug, Eq, PartialEq)]
37struct ParsedLinkResource {
38    destination: String,
39    destination_kind: LinkDestinationKind,
40    title: Option<String>,
41    title_kind: Option<LinkTitleKind>,
42}
43
44const REFERENCE_LABEL_MAX_CHARS: usize = 999;
45const WIKILINK_MAX_BYTES: usize = 999;
46
47#[derive(Clone, Copy, Debug)]
48struct Line<'a> {
49    text: &'a str,
50    eol: &'a str,
51    start: usize,
52    end: usize,
53    end_with_eol: usize,
54    /// True when this line reached the current container as a *lazy continuation*
55    /// — a line with no container marker that nonetheless continues an open
56    /// paragraph (CommonMark §5.2 laziness). Block constructs that must not be
57    /// started by a lazy line (e.g. a setext underline) consult this flag.
58    lazy: bool,
59}
60
61#[derive(Clone, Copy, Debug)]
62struct ListMarkerInfo<'a> {
63    ordered: bool,
64    start: Option<u64>,
65    delimiter: ListDelimiter,
66    indent: usize,
67    marker_len: usize,
68    content_indent: usize,
69    content: &'a str,
70}
71
72#[derive(Clone, Copy, Debug)]
73struct DescriptionMarker<'a> {
74    content_offset: usize,
75    content: &'a str,
76}
77
78#[derive(Clone, Debug)]
79struct DescriptionTerm {
80    marker_index: usize,
81    term_end: usize,
82    blank_after_term: bool,
83    source: String,
84    source_offset: usize,
85}
86
87#[derive(Clone, Copy, Debug, Eq, PartialEq)]
88enum HtmlBlockKind {
89    RawTag,
90    BlockTag,
91    Until(&'static str),
92    UntilBlank,
93}
94
95/// Parse `input` under the maximal default dialect ([`SyntaxOptions::default`]).
96/// Infallible and tolerant; sugar for `SyntaxOptions::default().parse(input)`.
97pub fn parse(input: &str) -> ParseOutput {
98    SyntaxOptions::default().parse(input)
99}
100
101impl SyntaxOptions {
102    /// Parse `input` under these options. Infallible and tolerant: a config
103    /// conflict (reachable only by hand-building contradictory `Constructs`) is
104    /// surfaced as an error diagnostic rather than a hard error. Call
105    /// [`SyntaxOptions::validate`] first for fail-fast config checking.
106    pub fn parse(&self, input: &str) -> ParseOutput {
107        match parse_checked(input, self) {
108            Ok(output) => output,
109            Err(error) => ParseOutput {
110                document: Document::default(),
111                diagnostics: vec![Diagnostic::new(
112                    DiagnosticSeverity::Error,
113                    DiagnosticCode::StrictParse,
114                    Span::new(0, input.len()),
115                    error.message(),
116                )],
117            },
118        }
119    }
120
121    /// Parse `input`, promoting a config conflict or any error-severity
122    /// diagnostic to a hard [`ParseStrictError`].
123    pub fn parse_strict(&self, input: &str) -> Result<ParseOutput, ParseStrictError> {
124        let output = parse_checked(input, self).map_err(ParseStrictError::Config)?;
125        if let Some(diagnostic) = output
126            .diagnostics
127            .iter()
128            .find(|diagnostic| diagnostic.severity == DiagnosticSeverity::Error)
129        {
130            return Err(ParseStrictError::Diagnostic(diagnostic.clone()));
131        }
132        Ok(output)
133    }
134}
135
136fn parse_checked(input: &str, options: &SyntaxOptions) -> Result<ParseOutput, SyntaxConfigError> {
137    options.validate()?;
138    let mut diagnostics = Vec::new();
139    let definitions = collect_definitions(input, options);
140    let children = parse_blocks(input, 0, true, options, &definitions, &mut diagnostics);
141
142    Ok(ParseOutput {
143        document: Document {
144            meta: NodeMeta::new(Some(Span::new(0, input.len()))),
145            children,
146        },
147        diagnostics,
148    })
149}
150
151fn parse_blocks(
152    input: &str,
153    base_offset: usize,
154    allow_frontmatter: bool,
155    options: &SyntaxOptions,
156    definitions: &[String],
157    diagnostics: &mut Vec<Diagnostic>,
158) -> Vec<Block> {
159    let lines = collect_lines(input, base_offset);
160    parse_blocks_from_lines(&lines, allow_frontmatter, options, definitions, diagnostics)
161}
162
163fn parse_blocks_from_lines(
164    lines: &[Line<'_>],
165    allow_frontmatter: bool,
166    options: &SyntaxOptions,
167    definitions: &[String],
168    diagnostics: &mut Vec<Diagnostic>,
169) -> Vec<Block> {
170    let mut blocks = Vec::new();
171    let mut index = 0;
172
173    while index < lines.len() {
174        let line = lines[index];
175        if line.text.trim().is_empty() {
176            index += 1;
177            continue;
178        }
179        let after_definition_unbroken = index > 0
180            && !lines[index - 1].text.trim().is_empty()
181            && matches!(blocks.last(), Some(Block::Definition(_)));
182
183        if allow_frontmatter && index == 0 {
184            if let Some((block, next)) = parse_frontmatter(lines, index, options) {
185                blocks.push(block);
186                index = next;
187                continue;
188            }
189        }
190
191        if let Some((block, next)) =
192            parse_container_directive(lines, index, options, definitions, diagnostics)
193        {
194            blocks.push(block);
195            index = next;
196            continue;
197        }
198
199        if let Some((block, next)) = parse_math_block(lines, index, options) {
200            blocks.push(block);
201            index = next;
202            continue;
203        }
204
205        if let Some((block, next)) = parse_fenced_code(lines, index, options) {
206            blocks.push(block);
207            index = next;
208            continue;
209        }
210
211        if let Some((block, next)) =
212            parse_block_quote(lines, index, options, definitions, diagnostics)
213        {
214            blocks.push(block);
215            index = next;
216            continue;
217        }
218
219        if let Some(block) = parse_atx_heading(line, options, definitions) {
220            blocks.push(block);
221            index += 1;
222            continue;
223        }
224
225        if let Some(block) = parse_thematic_break(line) {
226            blocks.push(block);
227            index += 1;
228            continue;
229        }
230
231        if let Some((block, next)) = parse_list(lines, index, options, definitions, diagnostics) {
232            blocks.push(block);
233            index = next;
234            continue;
235        }
236
237        if let Some((block, next)) =
238            parse_footnote_definition(lines, index, options, definitions, diagnostics)
239        {
240            blocks.push(block);
241            index = next;
242            continue;
243        }
244
245        if let Some((block, next)) =
246            parse_definition(lines, index, options, after_definition_unbroken)
247        {
248            blocks.push(block);
249            index = next;
250            continue;
251        }
252
253        if let Some(block) = parse_leaf_directive(line, options, definitions, diagnostics) {
254            blocks.push(block);
255            index += 1;
256            continue;
257        }
258
259        if let Some((block, next)) = parse_html_block(lines, index, options) {
260            blocks.push(block);
261            index = next;
262            continue;
263        }
264
265        if let Some((block, next)) = parse_mdx_flow(lines, index, options, diagnostics) {
266            blocks.push(block);
267            index = next;
268            continue;
269        }
270
271        if !after_definition_unbroken {
272            if let Some((block, next)) = parse_indented_code(lines, index, options) {
273                blocks.push(block);
274                index = next;
275                continue;
276            }
277        }
278
279        if let Some((block, next)) = parse_table(lines, index, options, definitions, diagnostics) {
280            blocks.push(block);
281            index = next;
282            continue;
283        }
284
285        if let Some((block, next)) = parse_setext_heading(lines, index, options, definitions) {
286            blocks.push(block);
287            index = next;
288            continue;
289        }
290
291        if let Some((block, next)) =
292            parse_description_list(lines, index, options, definitions, diagnostics)
293        {
294            blocks.push(block);
295            index = next;
296            continue;
297        }
298
299        let (block, next) = parse_paragraph(lines, index, options, definitions, diagnostics);
300        blocks.push(block);
301        index = next;
302    }
303
304    blocks
305}
306
307fn collect_lines(input: &str, base_offset: usize) -> Vec<Line<'_>> {
308    let bytes = input.as_bytes();
309    let mut lines = Vec::new();
310    let mut start = 0;
311    let mut index = 0;
312
313    while index < bytes.len() {
314        match bytes[index] {
315            b'\n' => {
316                let end = index;
317                lines.push(Line {
318                    text: &input[start..end],
319                    eol: &input[index..index + 1],
320                    start: base_offset + start,
321                    end: base_offset + end,
322                    end_with_eol: base_offset + index + 1,
323                    lazy: false,
324                });
325                index += 1;
326                start = index;
327            }
328            b'\r' => {
329                let end = index;
330                let eol_end = if index + 1 < bytes.len() && bytes[index + 1] == b'\n' {
331                    index + 2
332                } else {
333                    index + 1
334                };
335                lines.push(Line {
336                    text: &input[start..end],
337                    eol: &input[index..eol_end],
338                    start: base_offset + start,
339                    end: base_offset + end,
340                    end_with_eol: base_offset + eol_end,
341                    lazy: false,
342                });
343                index = eol_end;
344                start = index;
345            }
346            _ => index += 1,
347        }
348    }
349
350    if start < bytes.len() || input.is_empty() {
351        lines.push(Line {
352            text: &input[start..],
353            eol: "",
354            start: base_offset + start,
355            end: base_offset + bytes.len(),
356            end_with_eol: base_offset + bytes.len(),
357            lazy: false,
358        });
359    }
360
361    lines
362}
363
364fn collect_definitions(input: &str, options: &SyntaxOptions) -> Vec<String> {
365    let mut diagnostics = Vec::new();
366    let blocks = parse_blocks(input, 0, true, options, &[], &mut diagnostics);
367    let mut definitions = Vec::new();
368    collect_definition_refs_from_blocks(&blocks, &mut definitions);
369    definitions
370}
371
372fn collect_definition_refs_from_blocks(blocks: &[Block], definitions: &mut Vec<String>) {
373    for block in blocks {
374        match block {
375            Block::Definition(definition) => {
376                if definitions
377                    .iter()
378                    .all(|identifier| identifier != &definition.identifier)
379                {
380                    definitions.push(definition.identifier.clone());
381                }
382            }
383            Block::BlockQuote(node) => {
384                collect_definition_refs_from_blocks(&node.children, definitions);
385            }
386            Block::Alert(node) => {
387                collect_definition_refs_from_blocks(&node.children, definitions);
388            }
389            Block::List(node) => {
390                for item in &node.children {
391                    collect_definition_refs_from_blocks(&item.children, definitions);
392                }
393            }
394            Block::DescriptionList(node) => {
395                for item in &node.children {
396                    for details in &item.details {
397                        collect_definition_refs_from_blocks(&details.children, definitions);
398                    }
399                }
400            }
401            Block::FootnoteDefinition(node) => {
402                collect_definition_refs_from_blocks(&node.children, definitions);
403            }
404            Block::ContainerDirective(node) => {
405                collect_definition_refs_from_blocks(&node.children, definitions);
406            }
407            _ => {}
408        }
409    }
410}
411
412fn parse_frontmatter(
413    lines: &[Line<'_>],
414    index: usize,
415    options: &SyntaxOptions,
416) -> Option<(Block, usize)> {
417    if !options.constructs.frontmatter {
418        return None;
419    }
420    let kind = frontmatter_fence_kind(lines[index].text)?;
421
422    let mut value = String::new();
423    let mut cursor = index + 1;
424    while cursor < lines.len() {
425        if frontmatter_fence_kind(lines[cursor].text) == Some(kind) {
426            let span = Span::new(lines[index].start, lines[cursor].end_with_eol);
427            return Some((
428                Block::Frontmatter(Frontmatter {
429                    meta: NodeMeta::new(Some(span)),
430                    kind,
431                    value,
432                }),
433                cursor + 1,
434            ));
435        }
436        push_line(&mut value, lines[cursor].text);
437        cursor += 1;
438    }
439
440    None
441}
442
443fn frontmatter_fence_kind(line: &str) -> Option<FrontmatterKind> {
444    match line.trim_end_matches([' ', '\t']) {
445        "---" => Some(FrontmatterKind::Yaml),
446        "+++" => Some(FrontmatterKind::Toml),
447        _ => None,
448    }
449}
450
451fn parse_container_directive(
452    lines: &[Line<'_>],
453    index: usize,
454    options: &SyntaxOptions,
455    definitions: &[String],
456    diagnostics: &mut Vec<Diagnostic>,
457) -> Option<(Block, usize)> {
458    if !options.constructs.directive_container {
459        return None;
460    }
461    let trimmed = trim_up_to_three_spaces(lines[index].text)?;
462    let Some((fence_len, opener_rest)) = directive_container_opener_prefix(trimmed) else {
463        return None;
464    };
465    let opener_base = lines[index].start + (lines[index].text.len() - trimmed.len()) + fence_len;
466
467    let Some((name, label_source, attributes, _consumed)) = parse_directive_opener(opener_rest)
468    else {
469        diagnostics.push(Diagnostic::new(
470            DiagnosticSeverity::Error,
471            DiagnosticCode::InvalidDirectiveName,
472            Span::new(lines[index].start, lines[index].end),
473            "container directive must have a valid name",
474        ));
475        return None;
476    };
477    let label_base = opener_base + name.len() + 1;
478
479    let mut content = String::new();
480    let mut cursor = index + 1;
481    let mut nested_fences = Vec::new();
482    while cursor < lines.len() {
483        let line = lines[cursor].text;
484        let trimmed = trim_up_to_three_spaces(line);
485        if let Some(trimmed) = trimmed {
486            if let Some(nested_len) = nested_fences.last().copied() {
487                if directive_container_closing_fence(trimmed, nested_len).is_some() {
488                    nested_fences.pop();
489                    push_line(&mut content, line);
490                    cursor += 1;
491                    continue;
492                }
493            } else if directive_container_closing_fence(trimmed, fence_len).is_some() {
494                let label = label_source
495                    .map(|source| {
496                        parse_inlines(source, label_base, options, definitions, diagnostics)
497                    })
498                    .unwrap_or_default();
499                let children = parse_blocks(
500                    &content,
501                    lines[index + 1].start,
502                    false,
503                    options,
504                    definitions,
505                    diagnostics,
506                );
507                return Some((
508                    Block::ContainerDirective(ContainerDirective {
509                        meta: NodeMeta::new(Some(Span::new(
510                            lines[index].start,
511                            lines[cursor].end_with_eol,
512                        ))),
513                        name,
514                        label,
515                        attributes,
516                        children,
517                    }),
518                    cursor + 1,
519                ));
520            }
521
522            if let Some((nested_len, nested_rest)) = directive_container_opener_prefix(trimmed) {
523                if parse_directive_opener(nested_rest).is_some() {
524                    nested_fences.push(nested_len);
525                }
526            }
527        }
528
529        push_line(&mut content, line);
530        cursor += 1;
531    }
532
533    diagnostics.push(Diagnostic::new(
534        DiagnosticSeverity::Error,
535        DiagnosticCode::UnclosedDirectiveContainer,
536        Span::new(lines[index].start, lines[index].end),
537        "container directive is missing a closing fence",
538    ));
539    Some((
540        Block::ContainerDirective(ContainerDirective {
541            meta: NodeMeta::new(Some(Span::new(
542                lines[index].start,
543                lines.last()?.end_with_eol,
544            ))),
545            name,
546            label: label_source
547                .map(|source| parse_inlines(source, label_base, options, definitions, diagnostics))
548                .unwrap_or_default(),
549            attributes,
550            children: parse_blocks(
551                &content,
552                lines
553                    .get(index + 1)
554                    .map(|line| line.start)
555                    .unwrap_or(lines[index].end),
556                false,
557                options,
558                definitions,
559                diagnostics,
560            ),
561        }),
562        lines.len(),
563    ))
564}
565
566fn directive_container_opener_prefix(input: &str) -> Option<(usize, &str)> {
567    let fence_len = input
568        .as_bytes()
569        .iter()
570        .take_while(|byte| **byte == b':')
571        .count();
572    if fence_len >= 3 {
573        Some((fence_len, &input[fence_len..]))
574    } else {
575        None
576    }
577}
578
579fn directive_container_closing_fence(input: &str, min_len: usize) -> Option<usize> {
580    let fence_len = input
581        .as_bytes()
582        .iter()
583        .take_while(|byte| **byte == b':')
584        .count();
585    if fence_len >= min_len && input[fence_len..].trim().is_empty() {
586        Some(fence_len)
587    } else {
588        None
589    }
590}
591
592fn parse_math_block(
593    lines: &[Line<'_>],
594    index: usize,
595    options: &SyntaxOptions,
596) -> Option<(Block, usize)> {
597    if !options.constructs.math_block {
598        return None;
599    }
600    // A math-flow opener is the fenced-code analogue: a `>=2` dollar run after
601    // 0–3 columns of indent, optionally followed by an "info"/meta string that
602    // must NOT contain another `$` (`$$ $$` is inline math, not a flow open).
603    // The opening indent is stripped (up to its own width) from each content
604    // line, exactly like a fenced code block.
605    let opener = trim_up_to_three_spaces(lines[index].text)?;
606    let fence_length = math_block_fence_length(opener)?;
607    let opening_indent = leading_indent_columns(lines[index].text);
608
609    let mut value = String::new();
610    let mut content_lines = 0usize;
611    let mut cursor = index + 1;
612    while cursor < lines.len() {
613        if let Some(close_line) = trim_up_to_three_spaces(lines[cursor].text) {
614            if math_block_fence_closes(close_line, fence_length) {
615                return Some((
616                    Block::MathBlock(MathBlock {
617                        meta: NodeMeta::new(Some(Span::new(
618                            lines[index].start,
619                            lines[cursor].end_with_eol,
620                        ))),
621                        value,
622                    }),
623                    cursor + 1,
624                ));
625            }
626        }
627        if content_lines > 0 {
628            // The previous content line's `eol` usually separates lines. This
629            // fallback only covers synthetic child input that lacks an EOL despite
630            // yielding another line.
631            ensure_line_separator(&mut value);
632        }
633        let stripped = strip_leading_indent_columns(lines[cursor].text, opening_indent);
634        value.push_str(&stripped);
635        value.push_str(lines[cursor].eol);
636        content_lines += 1;
637        cursor += 1;
638    }
639
640    // EOF closes the block (an unclosed opener runs to end of document); an
641    // immediate EOF after the opener yields an empty math block.
642    Some((
643        Block::MathBlock(MathBlock {
644            meta: NodeMeta::new(Some(Span::new(
645                lines[index].start,
646                lines.last()?.end_with_eol,
647            ))),
648            value,
649        }),
650        lines.len(),
651    ))
652}
653
654/// Length of the leading `$` run if `input` (already indent-stripped) is a valid
655/// math-flow opener: `>=2` dollars, then an info string with no further `$`.
656fn math_block_fence_length(input: &str) -> Option<usize> {
657    let length = input
658        .as_bytes()
659        .iter()
660        .take_while(|byte| **byte == b'$')
661        .count();
662    if length < 2 || input[length..].contains('$') {
663        return None;
664    }
665    Some(length)
666}
667
668/// A math-flow closing line (already indent-stripped) is a run of `>=length`
669/// dollars and nothing else (trailing whitespace aside).
670fn math_block_fence_closes(input: &str, length: usize) -> bool {
671    let count = input
672        .as_bytes()
673        .iter()
674        .take_while(|byte| **byte == b'$')
675        .count();
676    count >= length && input[count..].trim().is_empty()
677}
678
679fn parse_fenced_code(
680    lines: &[Line<'_>],
681    index: usize,
682    options: &SyntaxOptions,
683) -> Option<(Block, usize)> {
684    let line = fence_line(lines[index].text, options)?;
685    let (marker, length) = fence_start(line)?;
686    // CommonMark: up to N columns of indentation (N = the opening fence's
687    // indent, 0–3) are removed from each content line.
688    let opening_indent = leading_indent_columns(lines[index].text);
689    let info = line[length..].trim();
690    if marker == FenceMarker::Backtick && info.contains('`') {
691        return None;
692    }
693    let info = if info.is_empty() {
694        None
695    } else {
696        Some(unescape_string(info))
697    };
698
699    let mut value = String::new();
700    // Join content lines with `\n` while preserving a leading blank line: a
701    // fenced block can open with a blank content line, and `push_line`'s
702    // empty-output proxy cannot tell zero lines from one empty line, so it would
703    // drop that leading blank. Track the count explicitly (as parse_math_block).
704    let mut content_lines = 0usize;
705    let mut cursor = index + 1;
706    while cursor < lines.len() {
707        if let Some(close_line) = fence_line(lines[cursor].text, options) {
708            if fence_close(close_line, marker, length) {
709                return Some((
710                    Block::CodeBlock(CodeBlock {
711                        meta: NodeMeta::new(Some(Span::new(
712                            lines[index].start,
713                            lines[cursor].end_with_eol,
714                        ))),
715                        kind: CodeBlockKind::Fenced { marker, length },
716                        info,
717                        value,
718                    }),
719                    cursor + 1,
720                ));
721            }
722        }
723        if content_lines > 0 {
724            // The previous content line's `eol` usually separates lines. This
725            // fallback only covers synthetic child input that lacks an EOL despite
726            // yielding another line.
727            ensure_line_separator(&mut value);
728        }
729        let stripped = strip_leading_indent_columns(lines[cursor].text, opening_indent);
730        value.push_str(&stripped);
731        value.push_str(lines[cursor].eol);
732        content_lines += 1;
733        cursor += 1;
734    }
735    Some((
736        Block::CodeBlock(CodeBlock {
737            meta: NodeMeta::new(Some(Span::new(
738                lines[index].start,
739                lines.last()?.end_with_eol,
740            ))),
741            kind: CodeBlockKind::Fenced { marker, length },
742            info,
743            value,
744        }),
745        lines.len(),
746    ))
747}
748
749fn fence_line<'a>(line: &'a str, options: &SyntaxOptions) -> Option<&'a str> {
750    if options.constructs.indented_code {
751        trim_up_to_three_spaces(line)
752    } else {
753        Some(trim_ascii_start(line))
754    }
755}
756
757fn container_closed_after_unclosed_fence(
758    lines: &[Line<'_>],
759    cursor: usize,
760    last_content_index: usize,
761    content: &str,
762    options: &SyntaxOptions,
763) -> bool {
764    !lines[last_content_index].eol.is_empty()
765        && (cursor >= lines.len() || lines[cursor].text.trim().is_empty())
766        && content_has_unclosed_fenced_code(content, options)
767}
768
769fn content_has_unclosed_fenced_code(content: &str, options: &SyntaxOptions) -> bool {
770    let lines = collect_lines(content, 0);
771    let mut open_fence = None;
772    for line in lines {
773        let Some(trimmed) = fence_line(line.text, options) else {
774            continue;
775        };
776        if let Some((marker, length, has_nonblank_content)) = open_fence {
777            if fence_close(trimmed, marker, length) {
778                open_fence = None;
779            } else {
780                open_fence = Some((
781                    marker,
782                    length,
783                    has_nonblank_content || !trimmed.trim().is_empty(),
784                ));
785            }
786            continue;
787        }
788        let Some((marker, length)) = fence_start(trimmed) else {
789            continue;
790        };
791        let info = trimmed[length..].trim();
792        if marker != FenceMarker::Backtick || !info.contains('`') {
793            open_fence = Some((marker, length, false));
794        }
795    }
796    open_fence.is_some_and(|(_, _, has_nonblank_content)| !has_nonblank_content)
797}
798
799/// Recursively determines whether the innermost block reachable through this
800/// (already marker-stripped) block-quote content line is an OPEN paragraph —
801/// the only block kind that a following lazy continuation line may extend.
802///
803/// Nested quote markers are stripped one level at a time so that, e.g.,
804/// `> > a` reports that the deepest content `a` is an open paragraph (this is
805/// what lets a lazy line continue a paragraph buried inside several quotes).
806/// Indented code, blank lines, HTML blocks, and every other block start are
807/// reported as NOT-an-open-paragraph.
808fn block_quote_content_paragraph_open(content: &str, options: &SyntaxOptions) -> bool {
809    let Some(trimmed) = trim_up_to_three_spaces(content) else {
810        // >= 4 columns of indentation: indented code, never a paragraph.
811        return false;
812    };
813    if trimmed.is_empty() {
814        return false;
815    }
816    if let Some(rest) = trimmed.strip_prefix('>') {
817        let rest = rest.strip_prefix(' ').unwrap_or(rest);
818        return block_quote_content_paragraph_open(rest, options);
819    }
820    if let Some(marker) = list_marker_info(trimmed) {
821        let first_content = list_marker_first_content(trimmed, marker);
822        return block_quote_content_paragraph_open(&first_content, options);
823    }
824    !lazy_line_starts_block(trimmed, options)
825}
826
827/// Whether a line starts a block for the purpose of LAZY-continuation
828/// suppression. Identical to [`likely_block_start`] except that *every* HTML
829/// block start — including the type-7 "complete tag" form that cannot interrupt
830/// a paragraph with a marker present — blocks lazy continuation. A bare `<a>`
831/// after `> a` must close the quote, not be absorbed as paragraph text.
832fn lazy_line_starts_block(input: &str, options: &SyntaxOptions) -> bool {
833    likely_block_start(input, options)
834        || (options.constructs.html_block && line_starts_html_block(input))
835        // A lazy line that almost opens a fenced code block — any fence-char
836        // run after up to three spaces of indent — ends the paragraph instead
837        // of continuing it (GH-19): `> x\n``\n` closes the quote rather than
838        // joining `` ` `` onto the paragraph.
839        || trim_up_to_three_spaces(input).is_some_and(|t| t.starts_with('`') || t.starts_with('~'))
840}
841
842fn parse_block_quote(
843    lines: &[Line<'_>],
844    index: usize,
845    options: &SyntaxOptions,
846    definitions: &[String],
847    diagnostics: &mut Vec<Diagnostic>,
848) -> Option<(Block, usize)> {
849    if !trim_up_to_three_spaces(lines[index].text)?.starts_with('>') {
850        return None;
851    }
852
853    let mut content = String::new();
854    // Lazy provenance per collected content line, parallel to the `\n`-joined
855    // `content`. Re-split (`collect_lines`) lines map 1:1 to these flags, so the
856    // child parser can suppress lazy-only constructs (e.g. setext underlines).
857    let mut lazy_flags: Vec<bool> = Vec::new();
858    let mut cursor = index;
859    let mut paragraph_open = false;
860    let mut in_table = false;
861    let mut last_content_line: Option<String> = None;
862    let mut content_base_offset = None;
863    while cursor < lines.len() {
864        let raw = lines[cursor].text;
865        let trimmed_opt = trim_up_to_three_spaces(raw);
866        let marked = trimmed_opt.is_some_and(|trimmed| trimmed.starts_with('>'));
867        let quote_rest_owned: String;
868        if let Some(trimmed) = trimmed_opt {
869            if trimmed.is_empty() {
870                break;
871            }
872        }
873        let (line, line_start) = if marked {
874            let trimmed = trimmed_opt.expect("marked implies a trimmed line");
875            let trimmed_start = lines[cursor].start + (raw.len() - trimmed.len());
876            let mut rest_start = 1;
877            let mut rest = &trimmed[rest_start..];
878            if rest.starts_with(' ') {
879                rest_start += 1;
880                rest = &rest[1..];
881            } else if rest.starts_with('\t') {
882                let marker_end_column = leading_indent_columns(raw) + 1;
883                match strip_leading_indent_columns_from(rest, 1, marker_end_column) {
884                    Cow::Borrowed(stripped) => rest = stripped,
885                    Cow::Owned(stripped) => {
886                        quote_rest_owned = stripped;
887                        rest = &quote_rest_owned;
888                    }
889                }
890            }
891            (rest, trimmed_start + rest_start)
892        } else if in_table {
893            // An open GFM table absorbs unmarked rows (lazy table body); a
894            // non-row unmarked line ends the quote.
895            break;
896        } else if paragraph_open && !lazy_line_starts_block(raw, options) {
897            // Lazy paragraph continuation: a marker-less line that continues an
898            // open paragraph (possibly nested). The RAW line is used verbatim —
899            // its indentation (even >= 4 columns) is paragraph text, not code.
900            (raw, lines[cursor].start)
901        } else {
902            break;
903        };
904
905        let mut escaped_lazy = String::new();
906        let line = if !marked
907            && last_content_line.as_deref().is_some_and(|previous| {
908                table_can_start_source(
909                    previous,
910                    line,
911                    options.constructs.indented_code,
912                    options.constructs.spoiler,
913                )
914            }) {
915            escaped_lazy.push_str(line);
916            if let Some(offset) = escaped_lazy.find('-') {
917                escaped_lazy.insert(offset, '\\');
918            }
919            &escaped_lazy
920        } else {
921            line
922        };
923
924        let starts_table = last_content_line.as_deref().is_some_and(|previous| {
925            table_can_start_source(
926                previous,
927                line,
928                options.constructs.indented_code,
929                options.constructs.spoiler,
930            )
931        });
932        if marked && starts_table {
933            paragraph_open = false;
934            in_table = true;
935        } else if marked && in_table && block_quote_table_body_row(line, options) {
936            paragraph_open = false;
937        } else {
938            in_table = false;
939            // Track the innermost open paragraph across nested quote markers so a
940            // following lazy line can reach a paragraph buried in nested quotes.
941            paragraph_open = block_quote_content_paragraph_open(line, options);
942        }
943        last_content_line = Some(line.into());
944        if content_base_offset.is_none() {
945            content_base_offset = Some(line_start);
946        }
947        push_line(&mut content, line);
948        lazy_flags.push(!marked);
949        cursor += 1;
950    }
951
952    let span = Span::new(lines[index].start, lines[cursor - 1].end_with_eol);
953    let child_base_offset = content_base_offset.unwrap_or(lines[index].start);
954    if !lines[cursor - 1].eol.is_empty() && !ends_with_line_ending(&content) {
955        content.push_str(lines[cursor - 1].eol);
956    }
957    if container_closed_after_unclosed_fence(lines, cursor, cursor - 1, &content, options) {
958        content.push('\n');
959    }
960    if let Some(alert) = parse_alert_from_block_quote(
961        &content,
962        child_base_offset,
963        span,
964        options,
965        definitions,
966        diagnostics,
967    ) {
968        return Some((alert, cursor));
969    }
970
971    let mut child_lines = collect_lines(&content, child_base_offset);
972    for (child, &lazy) in child_lines.iter_mut().zip(lazy_flags.iter()) {
973        child.lazy = lazy;
974    }
975    let children = parse_blocks_from_lines(&child_lines, false, options, definitions, diagnostics);
976    Some((
977        Block::BlockQuote(BlockQuote {
978            meta: NodeMeta::new(Some(span)),
979            children,
980        }),
981        cursor,
982    ))
983}
984
985fn parse_alert_from_block_quote(
986    content: &str,
987    base_offset: usize,
988    span: Span,
989    options: &SyntaxOptions,
990    definitions: &[String],
991    diagnostics: &mut Vec<Diagnostic>,
992) -> Option<Block> {
993    if !options.constructs.gfm_alert {
994        return None;
995    }
996    let (first_line, rest) = content.split_once('\n').unwrap_or((content, ""));
997    let (kind, title) = parse_alert_marker(first_line)?;
998    let rest_base_offset = base_offset + first_line.len() + usize::from(!rest.is_empty());
999    let children = if rest.is_empty() {
1000        Vec::new()
1001    } else {
1002        parse_blocks(
1003            rest,
1004            rest_base_offset,
1005            false,
1006            options,
1007            definitions,
1008            diagnostics,
1009        )
1010    };
1011    Some(Block::Alert(Alert {
1012        meta: NodeMeta::new(Some(span)),
1013        kind,
1014        title,
1015        children,
1016    }))
1017}
1018
1019fn parse_alert_marker(line: &str) -> Option<(AlertKind, Option<String>)> {
1020    let close = line.find(']')?;
1021    let marker = line.get(0..close + 1)?;
1022    if !marker.starts_with("[!") {
1023        return None;
1024    }
1025    let kind = match &marker[2..close].to_ascii_lowercase()[..] {
1026        "note" => AlertKind::Note,
1027        "tip" => AlertKind::Tip,
1028        "important" => AlertKind::Important,
1029        "warning" => AlertKind::Warning,
1030        "caution" => AlertKind::Caution,
1031        _ => return None,
1032    };
1033    let title = line[close + 1..].trim();
1034    Some((
1035        kind,
1036        if title.is_empty() {
1037            None
1038        } else {
1039            Some(title.into())
1040        },
1041    ))
1042}
1043
1044fn block_quote_table_body_row(line: &str, options: &SyntaxOptions) -> bool {
1045    table_indent_line(line, options.constructs.indented_code).is_some_and(|row| {
1046        !row.trim().is_empty() && contains_unescaped_pipe(row, options.constructs.spoiler)
1047    })
1048}
1049
1050fn parse_list(
1051    lines: &[Line<'_>],
1052    index: usize,
1053    options: &SyntaxOptions,
1054    definitions: &[String],
1055    diagnostics: &mut Vec<Diagnostic>,
1056) -> Option<(Block, usize)> {
1057    let first_marker = list_marker_info(lines[index].text)?;
1058    let mut items = Vec::new();
1059    let mut cursor = index;
1060    let mut tight = true;
1061
1062    while cursor < lines.len() {
1063        // A thematic break (`* * *`, `---`, …) outranks a list marker at the same
1064        // position: it ends the list rather than opening a nested item. Test it
1065        // before accepting the line as a marker (precedence belongs at the call
1066        // site, not inside `list_marker_info`).
1067        if parse_thematic_break(lines[cursor]).is_some() {
1068            break;
1069        }
1070        let Some(marker) = list_marker_info(lines[cursor].text) else {
1071            break;
1072        };
1073        if !same_list_marker(first_marker, marker) {
1074            break;
1075        }
1076
1077        let item_start = cursor;
1078        let mut item_end = cursor;
1079        let mut item_tight = true;
1080        // Byte offsets within `content` at which an item-internal blank line
1081        // sits. After the item's children are parsed, a blank loosens the item
1082        // only when it falls in the GAP between two consecutive top-level
1083        // children (a direct separator); a blank absorbed inside a nested
1084        // container's span does not (per-list tightness).
1085        let mut item_blank_offsets: Vec<usize> = Vec::new();
1086        let mut content = String::new();
1087        // Lazy provenance per collected content line (parallel to the `\n`-joined
1088        // `content`, mapped 1:1 by the re-split `collect_lines`). A line is lazy
1089        // when it reached the item only as a paragraph continuation while
1090        // dedented below the item's content start: it is paragraph text and must
1091        // not begin a new block (e.g. `- d\n    - e` keeps `- e` as the lazy tail
1092        // of `d`'s paragraph, not a sublist — CommonMark "too few spaces").
1093        let mut lazy_flags: Vec<bool> = Vec::new();
1094        let mut open_fence = None;
1095        let first_content = list_marker_first_content(lines[cursor].text, marker);
1096        let mut last_content_line: Option<String> = Some(first_content.as_ref().into());
1097        let mut paragraph_open = list_item_paragraph_stays_open(None, &first_content, options);
1098        // CommonMark §5.2: a list item can begin with at most one blank line.
1099        // When the marker has no content the item starts blank, and the first
1100        // following blank line ends it — later indented content cannot join
1101        // (`-\n\n  foo` → empty item + separate paragraph).
1102        let mut item_started_blank = first_content.trim().is_empty();
1103        push_line(&mut content, &first_content);
1104        lazy_flags.push(false);
1105        update_list_item_fence(&first_content, &mut open_fence);
1106        cursor += 1;
1107
1108        while cursor < lines.len() {
1109            if lines[cursor].text.trim().is_empty() {
1110                // Blank/whitespace lines inside an open fenced code block are
1111                // verbatim code content, not item-ending blanks: keep them.
1112                if open_fence.is_some() {
1113                    let stripped = strip_list_continuation(
1114                        lines[cursor].text,
1115                        marker.content_indent,
1116                        first_marker.indent,
1117                    );
1118                    push_line(&mut content, &stripped);
1119                    lazy_flags.push(false);
1120                    update_list_item_fence(&stripped, &mut open_fence);
1121                    item_end = cursor;
1122                    cursor += 1;
1123                    continue;
1124                }
1125                let next = next_nonblank_line(lines, cursor + 1);
1126                if item_started_blank
1127                    || next >= lines.len()
1128                    || sibling_list_marker_at_line(
1129                        lines[next].text,
1130                        first_marker,
1131                        marker.content_indent,
1132                    )
1133                    || leading_indent_columns(lines[next].text) < marker.content_indent
1134                {
1135                    if next < lines.len()
1136                        && sibling_list_marker_at_line(
1137                            lines[next].text,
1138                            first_marker,
1139                            marker.content_indent,
1140                        )
1141                    {
1142                        item_tight = false;
1143                    }
1144                    cursor = next;
1145                    break;
1146                }
1147                // A blank between item content is recorded; whether it actually
1148                // loosens THIS list is decided structurally after the item's
1149                // children are parsed (a blank buried in a nested sublist must
1150                // not loosen the outer list — CommonMark requires the item to
1151                // *directly* contain the blank-separated blocks). Track the blank
1152                // line's offset within the collected content so the structural
1153                // check can tell a direct-child separator from a nested one.
1154                item_blank_offsets.push(content.len() + usize::from(!content.is_empty()));
1155                paragraph_open = false;
1156                push_line(&mut content, "");
1157                lazy_flags.push(false);
1158                item_end = cursor;
1159                cursor += 1;
1160                continue;
1161            }
1162
1163            item_started_blank = false;
1164
1165            if sibling_list_marker_at_line(lines[cursor].text, first_marker, marker.content_indent)
1166            {
1167                break;
1168            }
1169
1170            // A list marker of a different type/delimiter is a block boundary
1171            // (CommonMark §5.3: changing the marker starts a new list). It is not
1172            // a same-list sibling, so it would otherwise be absorbed as lazy
1173            // paragraph text — break the item instead so a new list can start.
1174            if leading_indent_columns(lines[cursor].text) < marker.content_indent
1175                && !same_list_marker_line(lines[cursor].text, first_marker)
1176                && list_marker_info(lines[cursor].text).is_some()
1177            {
1178                break;
1179            }
1180
1181            if leading_indent_columns(lines[cursor].text) < marker.content_indent {
1182                if likely_block_start(lines[cursor].text, options) || !paragraph_open {
1183                    break;
1184                }
1185            }
1186
1187            // A line dedented below the item's content start only stays in the
1188            // item as a lazy paragraph continuation (it reached here because a
1189            // paragraph was open). Mark it lazy so the re-parse keeps it as
1190            // paragraph text rather than letting a stripped `- e`/`> q`/`# h`
1191            // begin a fresh block inside the item.
1192            let lazy = paragraph_open
1193                && leading_indent_columns(lines[cursor].text) < marker.content_indent;
1194            let stripped = strip_list_continuation(
1195                lines[cursor].text,
1196                marker.content_indent,
1197                first_marker.indent,
1198            );
1199            let starts_table = last_content_line.as_deref().is_some_and(|previous| {
1200                table_can_start_source(
1201                    previous,
1202                    &stripped,
1203                    options.constructs.indented_code,
1204                    options.constructs.spoiler,
1205                )
1206            });
1207            paragraph_open = if starts_table {
1208                false
1209            } else {
1210                list_item_paragraph_stays_open(Some(paragraph_open), &stripped, options)
1211            };
1212            push_line(&mut content, &stripped);
1213            lazy_flags.push(lazy);
1214            update_list_item_fence(&stripped, &mut open_fence);
1215            last_content_line = Some(stripped.into_owned());
1216            item_end = cursor;
1217            cursor += 1;
1218        }
1219
1220        let child_base = lines[item_start].start + marker.content_indent;
1221        if !lines[item_end].eol.is_empty() && !ends_with_line_ending(&content) {
1222            content.push_str(lines[item_end].eol);
1223        }
1224        if container_closed_after_unclosed_fence(lines, cursor, item_end, &content, options) {
1225            content.push('\n');
1226        }
1227        let mut child_lines = collect_lines(&content, child_base);
1228        for (child, &lazy) in child_lines.iter_mut().zip(lazy_flags.iter()) {
1229            child.lazy = lazy;
1230        }
1231        let mut children =
1232            parse_blocks_from_lines(&child_lines, false, options, definitions, diagnostics);
1233        let checked = if options.constructs.gfm_task_list_item {
1234            take_task_marker_from_children(&mut children)
1235        } else {
1236            None
1237        };
1238
1239        if item_tight
1240            && blank_separates_top_level_blocks(&item_blank_offsets, &children, child_base)
1241        {
1242            item_tight = false;
1243        }
1244        tight = tight && item_tight;
1245        items.push(ListItem {
1246            meta: NodeMeta::new(Some(Span::new(
1247                lines[item_start].start,
1248                lines[item_end].end_with_eol,
1249            ))),
1250            checked,
1251            children,
1252        });
1253    }
1254
1255    Some((
1256        Block::List(List {
1257            meta: NodeMeta::new(Some(Span::new(
1258                lines[index].start,
1259                lines[cursor - 1].end_with_eol,
1260            ))),
1261            ordered: first_marker.ordered,
1262            start: first_marker.start,
1263            delimiter: first_marker.delimiter,
1264            tight,
1265            children: items,
1266        }),
1267        cursor,
1268    ))
1269}
1270
1271/// Whether an item-internal blank line directly separates two of the item's own
1272/// top-level block children — which loosens the list. A blank loosens the item
1273/// when some top-level child STARTS after the blank: that child was split off
1274/// from the preceding content by the blank. A blank with no top-level child
1275/// starting after it was either trailing or absorbed into a nested container
1276/// (e.g. a sublist), so it does not loosen the outer list — CommonMark only
1277/// counts blank lines between blocks the item *directly* contains, and per-list
1278/// tightness keeps a sublist's internal blank from propagating outward.
1279///
1280/// Blank offsets and child spans share the `child_base` content origin (both
1281/// were produced from the same stripped item content), so the comparison is in
1282/// one coordinate space.
1283fn blank_separates_top_level_blocks(
1284    blank_offsets: &[usize],
1285    children: &[Block],
1286    child_base: usize,
1287) -> bool {
1288    if blank_offsets.is_empty() || children.len() < 2 {
1289        return false;
1290    }
1291    let Some(&first_blank) = blank_offsets.iter().min() else {
1292        return false;
1293    };
1294    children.iter().any(|child| {
1295        block_span(child).is_some_and(|span| span.start.saturating_sub(child_base) > first_blank)
1296    })
1297}
1298
1299fn block_span(block: &Block) -> Option<Span> {
1300    let meta = match block {
1301        Block::Paragraph(node) => &node.meta,
1302        Block::Heading(node) => &node.meta,
1303        Block::ThematicBreak(node) => &node.meta,
1304        Block::BlockQuote(node) => &node.meta,
1305        Block::Alert(node) => &node.meta,
1306        Block::List(node) => &node.meta,
1307        Block::DescriptionList(node) => &node.meta,
1308        Block::CodeBlock(node) => &node.meta,
1309        Block::HtmlBlock(node) => &node.meta,
1310        Block::Definition(node) => &node.meta,
1311        Block::FootnoteDefinition(node) => &node.meta,
1312        Block::Table(node) => &node.meta,
1313        Block::MathBlock(node) => &node.meta,
1314        Block::Frontmatter(node) => &node.meta,
1315        Block::MdxEsm(node) => &node.meta,
1316        Block::MdxExpression(node) => &node.meta,
1317        Block::MdxJsx(node) => &node.meta,
1318        Block::LeafDirective(node) => &node.meta,
1319        Block::ContainerDirective(node) => &node.meta,
1320    };
1321    meta.span
1322}
1323
1324fn list_item_paragraph_stays_open(
1325    previous_open: Option<bool>,
1326    line: &str,
1327    options: &SyntaxOptions,
1328) -> bool {
1329    if line.trim().is_empty() {
1330        return false;
1331    }
1332    if previous_open == Some(false) {
1333        return false;
1334    }
1335    block_quote_content_paragraph_open(line, options)
1336}
1337
1338fn parse_description_list(
1339    lines: &[Line<'_>],
1340    index: usize,
1341    options: &SyntaxOptions,
1342    definitions: &[String],
1343    diagnostics: &mut Vec<Diagnostic>,
1344) -> Option<(Block, usize)> {
1345    if !options.constructs.description_list || !is_description_term_line(lines[index].text, options)
1346    {
1347        return None;
1348    }
1349
1350    let mut cursor = index;
1351    let mut items = Vec::new();
1352    let mut tight = true;
1353    let mut list_end = lines[index].end_with_eol;
1354
1355    while cursor < lines.len() {
1356        if !is_description_term_line(lines[cursor].text, options) {
1357            break;
1358        }
1359        let Some(term) = description_term(lines, cursor, options) else {
1360            break;
1361        };
1362        let term_line = lines[cursor];
1363        let mut details = Vec::new();
1364        let item_start = term_line.start;
1365        let mut item_end = lines[term.term_end].end_with_eol;
1366        tight = tight && !term.blank_after_term;
1367        cursor = term.marker_index;
1368
1369        loop {
1370            let Some(marker) = description_marker(lines[cursor].text) else {
1371                break;
1372            };
1373            let (detail, next, detail_tight) = parse_description_details(
1374                lines,
1375                cursor,
1376                marker,
1377                options,
1378                definitions,
1379                diagnostics,
1380            )?;
1381            tight = tight && detail_tight;
1382            item_end = detail
1383                .meta
1384                .span
1385                .map(|span| span.end)
1386                .unwrap_or(lines[cursor].end_with_eol);
1387            details.push(detail);
1388            cursor = next;
1389
1390            let next_nonblank = next_nonblank_line(lines, cursor);
1391            if next_nonblank < lines.len()
1392                && description_marker(lines[next_nonblank].text).is_some()
1393            {
1394                if next_nonblank != cursor {
1395                    tight = false;
1396                }
1397                cursor = next_nonblank;
1398                continue;
1399            }
1400            break;
1401        }
1402
1403        if details.is_empty() {
1404            return None;
1405        }
1406        list_end = item_end;
1407        items.push(DescriptionItem {
1408            meta: NodeMeta::new(Some(Span::new(item_start, item_end))),
1409            term: parse_inlines(
1410                &term.source,
1411                term.source_offset,
1412                options,
1413                definitions,
1414                diagnostics,
1415            ),
1416            details,
1417        });
1418
1419        let next_item = next_nonblank_line(lines, cursor);
1420        if next_item >= lines.len() {
1421            cursor = next_item;
1422            break;
1423        }
1424        if description_term(lines, next_item, options).is_some() {
1425            if next_item != cursor {
1426                tight = false;
1427            }
1428            cursor = next_item;
1429            continue;
1430        }
1431        cursor = next_item;
1432        break;
1433    }
1434
1435    (!items.is_empty()).then_some((
1436        Block::DescriptionList(DescriptionList {
1437            meta: NodeMeta::new(Some(Span::new(lines[index].start, list_end))),
1438            tight,
1439            children: items,
1440        }),
1441        cursor,
1442    ))
1443}
1444
1445fn parse_description_details(
1446    lines: &[Line<'_>],
1447    index: usize,
1448    marker: DescriptionMarker<'_>,
1449    options: &SyntaxOptions,
1450    definitions: &[String],
1451    diagnostics: &mut Vec<Diagnostic>,
1452) -> Option<(DescriptionDetails, usize, bool)> {
1453    let mut content = String::new();
1454    push_line(&mut content, marker.content);
1455    let mut cursor = index + 1;
1456    let mut end = lines[index].end_with_eol;
1457    let mut tight = true;
1458    let mut paragraph_open = paragraph_stays_open(marker.content, options);
1459
1460    while cursor < lines.len() {
1461        if lines[cursor].text.trim().is_empty() {
1462            let next = next_nonblank_line(lines, cursor + 1);
1463            // A blank that merely separates this definition from a following
1464            // `:`/`~` marker (another definition of the SAME term) is
1465            // content-separating, so it loosens the list. A blank that ends the
1466            // item — because the next non-blank line begins a new TERM, or the
1467            // document ends — is just an item boundary and must NOT loosen the
1468            // list (such blank-separated term groups stay tight).
1469            if next >= lines.len() || description_term(lines, next, options).is_some() {
1470                cursor = next;
1471                break;
1472            }
1473            if description_marker(lines[next].text).is_some() {
1474                tight = false;
1475                cursor = next;
1476                break;
1477            }
1478            if strip_indent_continuation(lines[next].text).is_none() {
1479                break;
1480            }
1481            push_line(&mut content, "");
1482            paragraph_open = false;
1483            tight = false;
1484            end = lines[cursor].end_with_eol;
1485            cursor += 1;
1486            continue;
1487        }
1488
1489        if description_marker(lines[cursor].text).is_some()
1490            || description_term(lines, cursor, options).is_some()
1491        {
1492            break;
1493        }
1494
1495        let continuation = if let Some(continuation) = strip_indent_continuation(lines[cursor].text)
1496        {
1497            continuation
1498        } else if paragraph_open && !likely_block_start(lines[cursor].text, options) {
1499            trim_ascii_start(lines[cursor].text)
1500        } else {
1501            break;
1502        };
1503        paragraph_open = paragraph_stays_open(continuation, options);
1504        push_line(&mut content, continuation);
1505        end = lines[cursor].end_with_eol;
1506        cursor += 1;
1507    }
1508
1509    if content.trim().is_empty() {
1510        return None;
1511    }
1512
1513    Some((
1514        DescriptionDetails {
1515            meta: NodeMeta::new(Some(Span::new(lines[index].start, end))),
1516            children: parse_blocks(
1517                &content,
1518                lines[index].start + marker.content_offset,
1519                false,
1520                options,
1521                definitions,
1522                diagnostics,
1523            ),
1524        },
1525        cursor,
1526        tight,
1527    ))
1528}
1529
1530fn description_term(
1531    lines: &[Line<'_>],
1532    term_index: usize,
1533    options: &SyntaxOptions,
1534) -> Option<DescriptionTerm> {
1535    if term_index >= lines.len() || !is_description_term_line(lines[term_index].text, options) {
1536        return None;
1537    }
1538    let mut source = String::new();
1539    let mut term_end = term_index;
1540    let mut cursor = term_index;
1541    while cursor < lines.len() && is_description_term_line(lines[cursor].text, options) {
1542        if !source.is_empty() {
1543            source.push('\n');
1544        }
1545        source.push_str(trim_ascii_start(lines[cursor].text).trim_end());
1546        term_end = cursor;
1547        cursor += 1;
1548    }
1549
1550    let mut marker_index = cursor;
1551    let mut blank_after_term = false;
1552    while marker_index < lines.len() && lines[marker_index].text.trim().is_empty() {
1553        blank_after_term = true;
1554        marker_index += 1;
1555    }
1556    (marker_index < lines.len() && description_marker(lines[marker_index].text).is_some()).then(
1557        || DescriptionTerm {
1558            marker_index,
1559            term_end,
1560            blank_after_term,
1561            source,
1562            source_offset: lines[term_index].start + leading_trim_bytes(lines[term_index].text),
1563        },
1564    )
1565}
1566
1567fn is_description_term_line(line: &str, options: &SyntaxOptions) -> bool {
1568    leading_indent_columns(line) <= 3
1569        && !line.trim().is_empty()
1570        && description_marker(line).is_none()
1571        && !likely_block_start(line, options)
1572}
1573
1574fn description_marker(line: &str) -> Option<DescriptionMarker<'_>> {
1575    let (columns, bytes) = leading_indent(line);
1576    if columns > 2 || !matches!(line.as_bytes().get(bytes), Some(b':' | b'~')) {
1577        return None;
1578    }
1579    if line
1580        .as_bytes()
1581        .get(bytes + 1)
1582        .is_some_and(|byte| !matches!(*byte, b' ' | b'\t'))
1583    {
1584        return None;
1585    }
1586    let mut content_offset = bytes + 1;
1587    while line
1588        .as_bytes()
1589        .get(content_offset)
1590        .is_some_and(|byte| matches!(*byte, b' ' | b'\t'))
1591    {
1592        content_offset += 1;
1593    }
1594    Some(DescriptionMarker {
1595        content_offset,
1596        content: &line[content_offset..],
1597    })
1598}
1599
1600/// A paragraph inside an indent-continuation container (footnote/description
1601/// detail) keeps absorbing the next line as long as it is non-blank and does
1602/// not itself begin a new block.
1603fn paragraph_stays_open(line: &str, options: &SyntaxOptions) -> bool {
1604    !line.trim().is_empty() && !likely_block_start(line, options)
1605}
1606
1607/// Strips one level of indent-continuation (four spaces or a tab) from a line.
1608fn strip_indent_continuation(input: &str) -> Option<&str> {
1609    input
1610        .strip_prefix("    ")
1611        .or_else(|| input.strip_prefix('\t'))
1612}
1613
1614fn parse_atx_heading(
1615    line: Line<'_>,
1616    options: &SyntaxOptions,
1617    definitions: &[String],
1618) -> Option<Block> {
1619    let text = trim_up_to_three_spaces(line.text)?;
1620    let depth = text
1621        .as_bytes()
1622        .iter()
1623        .take_while(|byte| **byte == b'#')
1624        .count();
1625    if depth == 0 || depth > 6 {
1626        return None;
1627    }
1628    if text
1629        .as_bytes()
1630        .get(depth)
1631        .is_some_and(|byte| !matches!(*byte, b' ' | b'\t'))
1632        && text.len() != depth
1633    {
1634        return None;
1635    }
1636    let after_opening = &text[depth..];
1637    let content_start_in_text = depth + leading_trim_bytes(after_opening);
1638    let content = trim_closing_hashes(after_opening.trim_start());
1639    let content_start = line.start + (line.text.len() - text.len()) + content_start_in_text;
1640    Some(Block::Heading(Heading {
1641        meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1642        depth: depth as u8,
1643        kind: HeadingKind::Atx,
1644        children: parse_inlines(
1645            content,
1646            content_start,
1647            options,
1648            definitions,
1649            &mut Vec::new(),
1650        ),
1651    }))
1652}
1653
1654fn parse_thematic_break(line: Line<'_>) -> Option<Block> {
1655    let text = trim_up_to_three_spaces(line.text)?.trim();
1656    let mut marker = None;
1657    let mut count = 0;
1658    for char in text.chars() {
1659        if char == ' ' || char == '\t' {
1660            continue;
1661        }
1662        let current = match char {
1663            '-' => ThematicBreakMarker::Dash,
1664            '*' => ThematicBreakMarker::Asterisk,
1665            '_' => ThematicBreakMarker::Underscore,
1666            _ => return None,
1667        };
1668        if marker.is_some_and(|marker| marker != current) {
1669            return None;
1670        }
1671        marker = Some(current);
1672        count += 1;
1673    }
1674    if count >= 3 {
1675        Some(Block::ThematicBreak(ThematicBreak {
1676            meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1677            marker: marker?,
1678        }))
1679    } else {
1680        None
1681    }
1682}
1683
1684fn parse_definition(
1685    lines: &[Line<'_>],
1686    index: usize,
1687    options: &SyntaxOptions,
1688    allow_subsequent_indent: bool,
1689) -> Option<(Block, usize)> {
1690    let line = lines[index];
1691    let text = trim_definition_start(line.text, allow_subsequent_indent)?;
1692    if !text.starts_with('[') {
1693        return None;
1694    }
1695
1696    // A reference-definition label may span several lines (CommonMark §4.7): the
1697    // `]:` closing the label can appear on a later line. Accumulate continuation
1698    // lines until the label closes, stopping at a blank line or end of input (a
1699    // blank line cannot occur inside a label). The first line's <=3-space indent
1700    // is already stripped by `trim_up_to_three_spaces`; continuation lines are
1701    // appended verbatim, and `normalize_label` collapses the interior newlines and
1702    // surrounding whitespace when the label is matched.
1703    let mut accumulated = String::from(text);
1704    let mut label_end_line = index;
1705    let close = loop {
1706        if let Some(close) = find_reference_label_end(&accumulated, 0) {
1707            if accumulated.as_bytes().get(close + 1) == Some(&b':') {
1708                break close;
1709            }
1710            // A closed label not followed by `:` is not a definition.
1711            return None;
1712        }
1713        let next = label_end_line + 1;
1714        if next >= lines.len() || lines[next].text.trim().is_empty() {
1715            return None;
1716        }
1717        // The unclosed label behaves like an open paragraph: a continuation line
1718        // that itself begins a block construct (a setext underline, or a GFM table
1719        // header/delimiter pair) interrupts it, so the definition fails and the
1720        // lines are re-parsed as blocks (CommonMark/GFM prefer setext headings,
1721        // thematic breaks, fenced code, and tables over a label that has not yet
1722        // closed — e.g. `[\na\n=\n]: b` or `[\na\n:-\n]: b`).
1723        if likely_block_start(lines[next].text, options)
1724            || setext_underline_depth(lines[next].text).is_some()
1725            || table_can_start(lines, next, options)
1726        {
1727            return None;
1728        }
1729        accumulated.push('\n');
1730        accumulated.push_str(lines[next].text);
1731        label_end_line = next;
1732    };
1733    let label = String::from(&accumulated[1..close]);
1734    if normalize_label(&label).is_empty() {
1735        return None;
1736    }
1737    let label = label.as_str();
1738    let mut source = String::from(&accumulated[close + 2..]);
1739    let mut cursor = label_end_line;
1740    let mut best_without_title = None;
1741
1742    loop {
1743        if let Some(resource) = parse_definition_destination_title(&source) {
1744            if resource.title.is_some() {
1745                return Some((
1746                    Block::Definition(Definition {
1747                        meta: NodeMeta::new(Some(Span::new(
1748                            line.start,
1749                            lines[cursor].end_with_eol,
1750                        ))),
1751                        label: label.into(),
1752                        identifier: normalize_label(label),
1753                        destination: resource.destination,
1754                        destination_kind: resource.destination_kind,
1755                        title: resource.title,
1756                        title_kind: resource.title_kind,
1757                    }),
1758                    cursor + 1,
1759                ));
1760            }
1761
1762            best_without_title = Some((resource, cursor + 1));
1763            let next = cursor + 1;
1764            if next >= lines.len()
1765                || lines[next].text.trim().is_empty()
1766                || !line_can_start_definition_title(lines[next].text)
1767            {
1768                break;
1769            }
1770        }
1771
1772        let next = cursor + 1;
1773        if next >= lines.len() || lines[next].text.trim().is_empty() {
1774            break;
1775        }
1776        // A continuation line that itself begins a block-level construct (or a
1777        // setext underline) cannot be swallowed into the definition's pending,
1778        // not-yet-closed title: such a line interrupts the would-be paragraph, so
1779        // the definition fails and the lines are re-parsed as blocks (e.g.
1780        // `[a]: b '` then `***` is a paragraph + thematic break, not a title).
1781        if likely_block_start(lines[next].text, options)
1782            || setext_underline_depth(lines[next].text).is_some()
1783        {
1784            break;
1785        }
1786        source.push('\n');
1787        source.push_str(lines[next].text);
1788        cursor = next;
1789    }
1790
1791    let (resource, next) = best_without_title?;
1792    let end = lines[next - 1].end_with_eol;
1793    Some((
1794        Block::Definition(Definition {
1795            meta: NodeMeta::new(Some(Span::new(line.start, end))),
1796            label: label.into(),
1797            identifier: normalize_label(label),
1798            destination: resource.destination,
1799            destination_kind: resource.destination_kind,
1800            title: resource.title,
1801            title_kind: resource.title_kind,
1802        }),
1803        next,
1804    ))
1805}
1806
1807fn trim_definition_start(input: &str, allow_subsequent_indent: bool) -> Option<&str> {
1808    if let Some(trimmed) = trim_up_to_three_spaces(input) {
1809        return Some(trimmed);
1810    }
1811    if allow_subsequent_indent {
1812        let (columns, bytes) = leading_indent(input);
1813        if columns == 4 {
1814            return Some(&input[bytes..]);
1815        }
1816    }
1817    None
1818}
1819
1820fn parse_footnote_definition(
1821    lines: &[Line<'_>],
1822    index: usize,
1823    options: &SyntaxOptions,
1824    definitions: &[String],
1825    diagnostics: &mut Vec<Diagnostic>,
1826) -> Option<(Block, usize)> {
1827    if !options.constructs.footnote_definition {
1828        return None;
1829    }
1830    let line = lines[index];
1831    let text = line.text.trim();
1832    if !text.starts_with("[^") {
1833        return None;
1834    }
1835    let close = find_footnote_definition_label_end(text)?;
1836    let label = &text[2..close];
1837    if !is_footnote_label(label) {
1838        return None;
1839    }
1840    let rest = text[close + 2..].trim();
1841    let mut content = String::new();
1842    push_line(&mut content, rest);
1843    let mut cursor = index + 1;
1844    let mut end = line.end_with_eol;
1845    let mut paragraph_open = paragraph_stays_open(rest, options);
1846
1847    while cursor < lines.len() {
1848        if lines[cursor].text.trim().is_empty() {
1849            let next = next_nonblank_line(lines, cursor + 1);
1850            if next >= lines.len() || !is_footnote_continuation(lines[next].text) {
1851                break;
1852            }
1853            push_line(&mut content, "");
1854            paragraph_open = false;
1855            end = lines[cursor].end_with_eol;
1856            cursor += 1;
1857            continue;
1858        }
1859
1860        let continuation = if let Some(continuation) = strip_indent_continuation(lines[cursor].text)
1861        {
1862            continuation
1863        } else if paragraph_open && !likely_block_start(lines[cursor].text, options) {
1864            trim_ascii_start(lines[cursor].text)
1865        } else {
1866            break;
1867        };
1868        paragraph_open = paragraph_stays_open(continuation, options);
1869        push_line(&mut content, continuation);
1870        end = lines[cursor].end_with_eol;
1871        cursor += 1;
1872    }
1873
1874    Some((
1875        Block::FootnoteDefinition(FootnoteDefinition {
1876            meta: NodeMeta::new(Some(Span::new(line.start, end))),
1877            label: label.into(),
1878            identifier: normalize_label(label),
1879            children: parse_blocks(
1880                &content,
1881                line.end.saturating_sub(rest.len()),
1882                false,
1883                options,
1884                definitions,
1885                diagnostics,
1886            ),
1887        }),
1888        cursor,
1889    ))
1890}
1891
1892fn is_footnote_continuation(input: &str) -> bool {
1893    strip_indent_continuation(input).is_some()
1894}
1895
1896fn parse_leaf_directive(
1897    line: Line<'_>,
1898    options: &SyntaxOptions,
1899    definitions: &[String],
1900    diagnostics: &mut Vec<Diagnostic>,
1901) -> Option<Block> {
1902    if !options.constructs.directive_leaf {
1903        return None;
1904    }
1905    let trimmed = line.text.trim_start();
1906    if trimmed.starts_with(":::") || !trimmed.starts_with("::") {
1907        return None;
1908    }
1909    let opener_base = line.start + (line.text.len() - trimmed.len()) + 2;
1910    let Some((name, label_source, attributes, _)) = parse_directive_opener(&trimmed[2..]) else {
1911        diagnostics.push(Diagnostic::new(
1912            DiagnosticSeverity::Error,
1913            DiagnosticCode::InvalidDirectiveName,
1914            Span::new(line.start, line.end),
1915            "leaf directive must have a valid name",
1916        ));
1917        return None;
1918    };
1919    let label = label_source
1920        .map(|source| {
1921            parse_inlines(
1922                source,
1923                opener_base + name.len() + 1,
1924                options,
1925                definitions,
1926                diagnostics,
1927            )
1928        })
1929        .unwrap_or_default();
1930    Some(Block::LeafDirective(LeafDirective {
1931        meta: NodeMeta::new(Some(Span::new(line.start, line.end))),
1932        name,
1933        label,
1934        attributes,
1935    }))
1936}
1937
1938fn parse_html_block(
1939    lines: &[Line<'_>],
1940    index: usize,
1941    options: &SyntaxOptions,
1942) -> Option<(Block, usize)> {
1943    if !options.constructs.html_block {
1944        return None;
1945    }
1946
1947    let trimmed = trim_up_to_three_spaces(lines[index].text)?;
1948    let kind = html_block_start(trimmed)?;
1949    let mut value = String::new();
1950    let mut cursor = index;
1951    match kind {
1952        HtmlBlockKind::RawTag => {
1953            // CommonMark §4.6 type-1: the block ends on a line containing ANY of
1954            // `</script>`, `</pre>`, `</style>`, `</textarea>` (case-insensitive),
1955            // regardless of which opened it.
1956            while cursor < lines.len() {
1957                push_line(&mut value, lines[cursor].text);
1958                if ["script", "pre", "style", "textarea"]
1959                    .iter()
1960                    .any(|tag| line_contains_raw_closing_tag(lines[cursor].text, tag))
1961                {
1962                    cursor += 1;
1963                    break;
1964                }
1965                cursor += 1;
1966            }
1967        }
1968        HtmlBlockKind::BlockTag => {
1969            while cursor < lines.len() && !lines[cursor].text.trim().is_empty() {
1970                push_line(&mut value, lines[cursor].text);
1971                cursor += 1;
1972            }
1973        }
1974        HtmlBlockKind::Until(end) => {
1975            while cursor < lines.len() {
1976                push_line(&mut value, lines[cursor].text);
1977                if lines[cursor].text.contains(end) {
1978                    cursor += 1;
1979                    break;
1980                }
1981                cursor += 1;
1982            }
1983        }
1984        HtmlBlockKind::UntilBlank => {
1985            while cursor < lines.len() && !lines[cursor].text.trim().is_empty() {
1986                push_line(&mut value, lines[cursor].text);
1987                cursor += 1;
1988            }
1989        }
1990    }
1991    Some((
1992        Block::HtmlBlock(HtmlBlock {
1993            meta: NodeMeta::new(Some(Span::new(
1994                lines[index].start,
1995                lines[cursor - 1].end_with_eol,
1996            ))),
1997            value,
1998        }),
1999        cursor,
2000    ))
2001}
2002
2003fn html_block_start(input: &str) -> Option<HtmlBlockKind> {
2004    let trimmed = input.trim_end();
2005    if !trimmed.starts_with('<') {
2006        return None;
2007    }
2008
2009    if raw_html_tag_start(trimmed) {
2010        return Some(HtmlBlockKind::RawTag);
2011    }
2012    if trimmed.starts_with("<!--") {
2013        return Some(HtmlBlockKind::Until("-->"));
2014    }
2015    if trimmed.starts_with("<?") {
2016        return Some(HtmlBlockKind::Until("?>"));
2017    }
2018    if is_declaration_start(trimmed) {
2019        return Some(HtmlBlockKind::Until(">"));
2020    }
2021    if trimmed.starts_with("<![CDATA[") {
2022        return Some(HtmlBlockKind::Until("]]>"));
2023    }
2024
2025    if html_block_tag_start(trimmed) {
2026        return Some(HtmlBlockKind::BlockTag);
2027    }
2028
2029    let Some((end, _tag_name)) = parse_html_tag(trimmed, 0) else {
2030        return None;
2031    };
2032    let rest = trimmed[end..].trim();
2033    if rest.is_empty() {
2034        Some(HtmlBlockKind::UntilBlank)
2035    } else {
2036        None
2037    }
2038}
2039
2040pub(crate) fn line_starts_html_block(input: &str) -> bool {
2041    trim_up_to_three_spaces(input)
2042        .and_then(html_block_start)
2043        .is_some()
2044}
2045
2046fn raw_html_tag_start(input: &str) -> bool {
2047    for tag in ["script", "pre", "style", "textarea"] {
2048        if html_raw_open_tag_prefix(input, tag) {
2049            return true;
2050        }
2051    }
2052    false
2053}
2054
2055fn html_raw_open_tag_prefix(input: &str, tag: &str) -> bool {
2056    let Some(rest) = input.strip_prefix('<') else {
2057        return false;
2058    };
2059    if rest.starts_with('/') || rest.len() < tag.len() {
2060        return false;
2061    }
2062    let rest_bytes = rest.as_bytes();
2063    let tag_bytes = tag.as_bytes();
2064    if !rest_bytes
2065        .get(..tag_bytes.len())
2066        .is_some_and(|name| name.eq_ignore_ascii_case(tag_bytes))
2067    {
2068        return false;
2069    }
2070    match rest_bytes.get(tag.len()) {
2071        None => true,
2072        Some(b' ' | b'\t' | b'\n' | b'\r' | b'>') => true,
2073        Some(b'/') => {
2074            rest_bytes.get(tag.len() + 1) == Some(&b'>') && rest_bytes.get(tag.len() + 2).is_none()
2075        }
2076        _ => false,
2077    }
2078}
2079
2080fn line_contains_raw_closing_tag(input: &str, tag: &str) -> bool {
2081    let bytes = input.as_bytes();
2082    let tag_bytes = tag.as_bytes();
2083    let mut cursor = 0;
2084
2085    while cursor + 2 + tag_bytes.len() <= bytes.len() {
2086        let tag_start = cursor + 2;
2087        let tag_end = tag_start + tag_bytes.len();
2088        if bytes.get(cursor) == Some(&b'<')
2089            && bytes.get(cursor + 1) == Some(&b'/')
2090            && bytes
2091                .get(tag_start..tag_end)
2092                .is_some_and(|name| name.eq_ignore_ascii_case(tag_bytes))
2093        {
2094            match bytes.get(tag_end) {
2095                Some(b'>') => return true,
2096                Some(byte) if byte.is_ascii_whitespace() => {
2097                    let mut after_space = tag_end;
2098                    while bytes
2099                        .get(after_space)
2100                        .is_some_and(|byte| byte.is_ascii_whitespace())
2101                    {
2102                        after_space += 1;
2103                    }
2104                    if bytes.get(after_space) == Some(&b'>') {
2105                        return true;
2106                    }
2107                }
2108                _ => {}
2109            }
2110        }
2111        cursor += 1;
2112    }
2113
2114    false
2115}
2116
2117fn html_block_tag_start(input: &str) -> bool {
2118    let bytes = input.as_bytes();
2119    if bytes.first() != Some(&b'<') {
2120        return false;
2121    }
2122
2123    let mut cursor = 1;
2124    if bytes.get(cursor) == Some(&b'/') {
2125        cursor += 1;
2126    }
2127
2128    let name_start = cursor;
2129    if !bytes
2130        .get(cursor)
2131        .is_some_and(|byte| byte.is_ascii_alphabetic())
2132    {
2133        return false;
2134    }
2135    cursor += 1;
2136    while bytes.get(cursor).is_some_and(|byte| html_name_byte(*byte)) {
2137        cursor += 1;
2138    }
2139
2140    let name = &input[name_start..cursor];
2141    if !html_block_tag(name) {
2142        return false;
2143    }
2144
2145    match bytes.get(cursor) {
2146        None | Some(b' ' | b'\t' | b'\n' | b'\r' | b'>') => true,
2147        Some(b'/') if bytes.get(cursor + 1) == Some(&b'>') => true,
2148        _ => false,
2149    }
2150}
2151
2152fn html_block_tag(tag: &str) -> bool {
2153    matches!(
2154        tag.to_ascii_lowercase().as_str(),
2155        "address"
2156            | "article"
2157            | "aside"
2158            | "base"
2159            | "basefont"
2160            | "blockquote"
2161            | "body"
2162            | "caption"
2163            | "center"
2164            | "col"
2165            | "colgroup"
2166            | "dd"
2167            | "details"
2168            | "dialog"
2169            | "dir"
2170            | "div"
2171            | "dl"
2172            | "dt"
2173            | "fieldset"
2174            | "figcaption"
2175            | "figure"
2176            | "footer"
2177            | "form"
2178            | "frame"
2179            | "frameset"
2180            | "h1"
2181            | "h2"
2182            | "h3"
2183            | "h4"
2184            | "h5"
2185            | "h6"
2186            | "head"
2187            | "header"
2188            | "hr"
2189            | "html"
2190            | "iframe"
2191            | "legend"
2192            | "li"
2193            | "link"
2194            | "main"
2195            | "menu"
2196            | "menuitem"
2197            | "nav"
2198            | "noframes"
2199            | "ol"
2200            | "optgroup"
2201            | "option"
2202            | "p"
2203            | "param"
2204            | "search"
2205            | "section"
2206            | "summary"
2207            | "table"
2208            | "tbody"
2209            | "td"
2210            | "tfoot"
2211            | "th"
2212            | "thead"
2213            | "title"
2214            | "tr"
2215            | "track"
2216            | "ul"
2217    )
2218}
2219
2220fn is_declaration_start(input: &str) -> bool {
2221    input
2222        .as_bytes()
2223        .get(2)
2224        .is_some_and(|byte| input.starts_with("<!") && byte.is_ascii_alphabetic())
2225}
2226
2227fn parse_mdx_flow(
2228    lines: &[Line<'_>],
2229    index: usize,
2230    options: &SyntaxOptions,
2231    diagnostics: &mut Vec<Diagnostic>,
2232) -> Option<(Block, usize)> {
2233    if options.constructs.mdx_esm {
2234        if let Some((block, next)) = parse_mdx_esm_flow(lines, index, diagnostics) {
2235            return Some((block, next));
2236        }
2237    }
2238
2239    let line = lines[index];
2240    let trimmed = line.text.trim_start();
2241    if options.constructs.mdx_expression_block && trimmed.starts_with('{') {
2242        let open_byte = line.text.len() - trimmed.len();
2243        if let Some((close_line, close_byte)) = find_mdx_expression_close(lines, index, open_byte) {
2244            return Some((
2245                Block::MdxExpression(MdxExpression {
2246                    meta: NodeMeta::new(Some(Span::new(line.start, lines[close_line].end))),
2247                    value: collect_mdx_expression_value(
2248                        lines, index, open_byte, close_line, close_byte,
2249                    ),
2250                }),
2251                close_line + 1,
2252            ));
2253        }
2254        diagnostics.push(Diagnostic::new(
2255            DiagnosticSeverity::Error,
2256            DiagnosticCode::InvalidMdx,
2257            Span::new(line.start + open_byte, lines.last()?.end_with_eol),
2258            "MDX expression block is missing a closing brace",
2259        ));
2260    }
2261    if options.constructs.mdx_jsx_block && trimmed.starts_with('<') {
2262        if let Some(close_line) = find_mdx_jsx_close(lines, index) {
2263            return Some((
2264                Block::MdxJsx(MdxJsx {
2265                    meta: NodeMeta::new(Some(Span::new(line.start, lines[close_line].end))),
2266                    value: collect_line_range(lines, index, close_line),
2267                }),
2268                close_line + 1,
2269            ));
2270        }
2271        let start_byte = line.text.len() - trimmed.len();
2272        if let Some(root) = mdx_jsx_tag_start(line.text, start_byte) {
2273            if !root.closing {
2274                if let Some((_tag_end_line, _tag_end_byte, self_closing)) =
2275                    find_mdx_jsx_tag_end(lines, index, start_byte)
2276                {
2277                    if !self_closing {
2278                        diagnostics.push(Diagnostic::new(
2279                            DiagnosticSeverity::Error,
2280                            DiagnosticCode::InvalidMdx,
2281                            Span::new(line.start + start_byte, lines.last()?.end_with_eol),
2282                            "MDX JSX block is missing a closing tag",
2283                        ));
2284                    }
2285                }
2286            }
2287        }
2288    }
2289    None
2290}
2291
2292#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
2293struct MdxEsmState {
2294    brace_depth: usize,
2295    bracket_depth: usize,
2296    paren_depth: usize,
2297    block_comment: bool,
2298    quote: Option<u8>,
2299    escaped: bool,
2300}
2301
2302#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2303enum MdxBraceState {
2304    Normal,
2305    SingleQuoted,
2306    DoubleQuoted,
2307    Template,
2308    LineComment,
2309    BlockComment,
2310}
2311
2312#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2313enum MdxJsxTag<'a> {
2314    Fragment,
2315    Named(&'a str),
2316}
2317
2318#[derive(Clone, Copy, Debug, Eq, PartialEq)]
2319struct MdxJsxTagStart<'a> {
2320    tag: MdxJsxTag<'a>,
2321    closing: bool,
2322}
2323
2324fn parse_mdx_esm_flow(
2325    lines: &[Line<'_>],
2326    index: usize,
2327    diagnostics: &mut Vec<Diagnostic>,
2328) -> Option<(Block, usize)> {
2329    if !is_mdx_esm_start(lines[index].text) {
2330        return None;
2331    }
2332
2333    let mut value = String::new();
2334    let mut state = MdxEsmState::default();
2335    let mut cursor = index;
2336    while cursor < lines.len() {
2337        let line = lines[cursor].text;
2338        if cursor > index && !is_mdx_esm_continuation(line, &state) {
2339            break;
2340        }
2341        if cursor > index {
2342            value.push('\n');
2343        }
2344        value.push_str(line);
2345        update_mdx_esm_state(line, &mut state);
2346        cursor += 1;
2347    }
2348    if cursor >= lines.len() && state_has_open_mdx_esm_construct(&state) {
2349        diagnostics.push(Diagnostic::new(
2350            DiagnosticSeverity::Error,
2351            DiagnosticCode::InvalidMdx,
2352            Span::new(lines[index].start, lines[cursor - 1].end_with_eol),
2353            "MDX ESM block is missing a closing delimiter",
2354        ));
2355    }
2356
2357    Some((
2358        Block::MdxEsm(MdxEsm {
2359            meta: NodeMeta::new(Some(Span::new(lines[index].start, lines[cursor - 1].end))),
2360            value,
2361        }),
2362        cursor,
2363    ))
2364}
2365
2366fn is_mdx_esm_start(line: &str) -> bool {
2367    line.starts_with("import ") || line.starts_with("export ")
2368}
2369
2370fn is_mdx_esm_continuation(line: &str, state: &MdxEsmState) -> bool {
2371    if state_has_open_mdx_esm_construct(state) {
2372        return true;
2373    }
2374    let trimmed = line.trim_start();
2375    if trimmed.is_empty() {
2376        return false;
2377    }
2378    is_mdx_esm_start(line) || trimmed.starts_with("//") || trimmed.starts_with("/*")
2379}
2380
2381fn state_has_open_mdx_esm_construct(state: &MdxEsmState) -> bool {
2382    state.brace_depth > 0
2383        || state.bracket_depth > 0
2384        || state.paren_depth > 0
2385        || state.block_comment
2386        || state.quote == Some(b'`')
2387}
2388
2389fn update_mdx_esm_state(line: &str, state: &mut MdxEsmState) {
2390    let bytes = line.as_bytes();
2391    let mut index = 0;
2392    while index < bytes.len() {
2393        let byte = bytes[index];
2394        if state.block_comment {
2395            if byte == b'*' && bytes.get(index + 1) == Some(&b'/') {
2396                state.block_comment = false;
2397                index += 1;
2398            }
2399            index += 1;
2400            continue;
2401        }
2402
2403        if let Some(delimiter) = state.quote {
2404            if state.escaped {
2405                state.escaped = false;
2406            } else if byte == b'\\' {
2407                state.escaped = true;
2408            } else if byte == delimiter {
2409                state.quote = None;
2410            }
2411            index += 1;
2412            continue;
2413        }
2414
2415        match byte {
2416            b'\'' | b'"' | b'`' => state.quote = Some(byte),
2417            b'/' if bytes.get(index + 1) == Some(&b'/') => break,
2418            b'/' if bytes.get(index + 1) == Some(&b'*') => {
2419                state.block_comment = true;
2420                index += 1;
2421            }
2422            b'{' => state.brace_depth += 1,
2423            b'}' => state.brace_depth = state.brace_depth.saturating_sub(1),
2424            b'[' => state.bracket_depth += 1,
2425            b']' => state.bracket_depth = state.bracket_depth.saturating_sub(1),
2426            b'(' => state.paren_depth += 1,
2427            b')' => state.paren_depth = state.paren_depth.saturating_sub(1),
2428            _ => {}
2429        }
2430        index += 1;
2431    }
2432}
2433
2434fn find_mdx_expression_close(
2435    lines: &[Line<'_>],
2436    index: usize,
2437    open_byte: usize,
2438) -> Option<(usize, usize)> {
2439    let mut depth = 0usize;
2440    let mut state = MdxBraceState::Normal;
2441    let mut escaped = false;
2442    let mut cursor = index;
2443
2444    while cursor < lines.len() {
2445        let bytes = lines[cursor].text.as_bytes();
2446        let mut byte_index = if cursor == index { open_byte } else { 0 };
2447        while byte_index < bytes.len() {
2448            let byte = bytes[byte_index];
2449            match state {
2450                MdxBraceState::Normal => match byte {
2451                    b'\'' => state = MdxBraceState::SingleQuoted,
2452                    b'"' => state = MdxBraceState::DoubleQuoted,
2453                    b'`' => state = MdxBraceState::Template,
2454                    b'/' if bytes.get(byte_index + 1) == Some(&b'/') => {
2455                        state = MdxBraceState::LineComment;
2456                        break;
2457                    }
2458                    b'/' if bytes.get(byte_index + 1) == Some(&b'*') => {
2459                        state = MdxBraceState::BlockComment;
2460                        byte_index += 1;
2461                    }
2462                    b'{' => depth += 1,
2463                    b'}' => {
2464                        depth = depth.checked_sub(1)?;
2465                        if depth == 0 {
2466                            return lines[cursor].text[byte_index + 1..]
2467                                .trim()
2468                                .is_empty()
2469                                .then_some((cursor, byte_index));
2470                        }
2471                    }
2472                    _ => {}
2473                },
2474                MdxBraceState::SingleQuoted => {
2475                    update_mdx_quote_state(byte, b'\'', &mut state, &mut escaped);
2476                }
2477                MdxBraceState::DoubleQuoted => {
2478                    update_mdx_quote_state(byte, b'"', &mut state, &mut escaped);
2479                }
2480                MdxBraceState::Template => {
2481                    update_mdx_quote_state(byte, b'`', &mut state, &mut escaped);
2482                }
2483                MdxBraceState::LineComment => break,
2484                MdxBraceState::BlockComment => {
2485                    if byte == b'*' && bytes.get(byte_index + 1) == Some(&b'/') {
2486                        state = MdxBraceState::Normal;
2487                        byte_index += 1;
2488                    }
2489                }
2490            }
2491            byte_index += 1;
2492        }
2493        if state == MdxBraceState::LineComment {
2494            state = MdxBraceState::Normal;
2495        }
2496        cursor += 1;
2497    }
2498
2499    None
2500}
2501
2502fn update_mdx_quote_state(byte: u8, delimiter: u8, state: &mut MdxBraceState, escaped: &mut bool) {
2503    if *escaped {
2504        *escaped = false;
2505        return;
2506    }
2507    if byte == b'\\' {
2508        *escaped = true;
2509        return;
2510    }
2511    if byte == delimiter {
2512        *state = MdxBraceState::Normal;
2513    }
2514}
2515
2516fn find_mdx_expression_inline_close(input: &str, open_byte: usize) -> Option<usize> {
2517    let bytes = input.as_bytes();
2518    if bytes.get(open_byte) != Some(&b'{') {
2519        return None;
2520    }
2521
2522    let mut depth = 0usize;
2523    let mut state = MdxBraceState::Normal;
2524    let mut escaped = false;
2525    let mut cursor = open_byte;
2526    while cursor < bytes.len() {
2527        let byte = bytes[cursor];
2528        match state {
2529            MdxBraceState::Normal => match byte {
2530                b'\'' => state = MdxBraceState::SingleQuoted,
2531                b'"' => state = MdxBraceState::DoubleQuoted,
2532                b'`' => state = MdxBraceState::Template,
2533                b'/' if bytes.get(cursor + 1) == Some(&b'/') => {
2534                    state = MdxBraceState::LineComment;
2535                    cursor += 1;
2536                }
2537                b'/' if bytes.get(cursor + 1) == Some(&b'*') => {
2538                    state = MdxBraceState::BlockComment;
2539                    cursor += 1;
2540                }
2541                b'{' => depth += 1,
2542                b'}' => {
2543                    depth = depth.checked_sub(1)?;
2544                    if depth == 0 {
2545                        return Some(cursor);
2546                    }
2547                }
2548                _ => {}
2549            },
2550            MdxBraceState::SingleQuoted => {
2551                update_mdx_quote_state(byte, b'\'', &mut state, &mut escaped);
2552            }
2553            MdxBraceState::DoubleQuoted => {
2554                update_mdx_quote_state(byte, b'"', &mut state, &mut escaped);
2555            }
2556            MdxBraceState::Template => {
2557                update_mdx_quote_state(byte, b'`', &mut state, &mut escaped);
2558            }
2559            MdxBraceState::LineComment => {
2560                if byte == b'\n' {
2561                    state = MdxBraceState::Normal;
2562                }
2563            }
2564            MdxBraceState::BlockComment => {
2565                if byte == b'*' && bytes.get(cursor + 1) == Some(&b'/') {
2566                    state = MdxBraceState::Normal;
2567                    cursor += 1;
2568                }
2569            }
2570        }
2571        cursor += 1;
2572    }
2573    None
2574}
2575
2576fn collect_mdx_expression_value(
2577    lines: &[Line<'_>],
2578    start_line: usize,
2579    open_byte: usize,
2580    close_line: usize,
2581    close_byte: usize,
2582) -> String {
2583    let mut value = String::new();
2584    let mut cursor = start_line;
2585    while cursor <= close_line {
2586        if cursor > start_line {
2587            value.push('\n');
2588        }
2589        let line = lines[cursor].text;
2590        let segment = if cursor == start_line && cursor == close_line {
2591            &line[open_byte + 1..close_byte]
2592        } else if cursor == start_line {
2593            &line[open_byte + 1..]
2594        } else if cursor == close_line {
2595            &line[..close_byte]
2596        } else {
2597            line
2598        };
2599        value.push_str(segment);
2600        cursor += 1;
2601    }
2602    value
2603}
2604
2605fn find_mdx_jsx_close<'a>(lines: &'a [Line<'a>], index: usize) -> Option<usize> {
2606    let line = lines[index];
2607    let trimmed = line.text.trim_start();
2608    let start_byte = line.text.len() - trimmed.len();
2609    let root = mdx_jsx_tag_start(line.text, start_byte)?;
2610    if root.closing {
2611        return None;
2612    }
2613
2614    let (mut cursor_line, mut cursor_byte, self_closing) =
2615        find_mdx_jsx_tag_end(lines, index, start_byte)?;
2616    if self_closing {
2617        return Some(cursor_line);
2618    }
2619
2620    let mut depth = 1usize;
2621    cursor_byte += 1;
2622    'scan: while cursor_line < lines.len() {
2623        let line = lines[cursor_line].text;
2624        while cursor_byte < line.len() {
2625            let Some(relative_start) = line[cursor_byte..].find('<') else {
2626                break;
2627            };
2628            let tag_start_byte = cursor_byte + relative_start;
2629            let Some(candidate) = mdx_jsx_tag_start(line, tag_start_byte) else {
2630                cursor_byte = tag_start_byte + 1;
2631                continue;
2632            };
2633            let Some((tag_end_line, tag_end_byte, candidate_self_closing)) =
2634                find_mdx_jsx_tag_end(lines, cursor_line, tag_start_byte)
2635            else {
2636                return None;
2637            };
2638
2639            if mdx_jsx_tag_matches(root.tag, candidate.tag) {
2640                if candidate.closing {
2641                    depth = depth.saturating_sub(1);
2642                    if depth == 0 {
2643                        return Some(tag_end_line);
2644                    }
2645                } else if !candidate_self_closing {
2646                    depth += 1;
2647                }
2648            }
2649
2650            cursor_byte = tag_end_byte + 1;
2651            if tag_end_line != cursor_line {
2652                cursor_line = tag_end_line;
2653                continue 'scan;
2654            }
2655        }
2656        cursor_line += 1;
2657        cursor_byte = 0;
2658    }
2659    None
2660}
2661
2662fn parse_mdx_jsx_inline(input: &str, index: usize) -> Option<(usize, String)> {
2663    let root = mdx_jsx_tag_start(input, index)?;
2664    if root.closing {
2665        return None;
2666    }
2667
2668    let (mut cursor, self_closing) = find_mdx_jsx_tag_end_in_text(input, index)?;
2669    if self_closing {
2670        let end = cursor + 1;
2671        return Some((end, input[index..end].into()));
2672    }
2673
2674    let mut depth = 1usize;
2675    cursor += 1;
2676    while cursor < input.len() {
2677        let Some(relative_start) = input[cursor..].find('<') else {
2678            return None;
2679        };
2680        let tag_start_byte = cursor + relative_start;
2681        let Some(candidate) = mdx_jsx_tag_start(input, tag_start_byte) else {
2682            cursor = tag_start_byte + 1;
2683            continue;
2684        };
2685        let Some((tag_end, candidate_self_closing)) =
2686            find_mdx_jsx_tag_end_in_text(input, tag_start_byte)
2687        else {
2688            return None;
2689        };
2690
2691        if mdx_jsx_tag_matches(root.tag, candidate.tag) {
2692            if candidate.closing {
2693                depth = depth.saturating_sub(1);
2694                if depth == 0 {
2695                    let end = tag_end + 1;
2696                    return Some((end, input[index..end].into()));
2697                }
2698            } else if !candidate_self_closing {
2699                depth += 1;
2700            }
2701        }
2702        cursor = tag_end + 1;
2703    }
2704    None
2705}
2706
2707fn mdx_jsx_tag_start(input: &str, start: usize) -> Option<MdxJsxTagStart<'_>> {
2708    let bytes = input.as_bytes();
2709    if bytes.get(start) != Some(&b'<') {
2710        return None;
2711    }
2712
2713    match bytes.get(start + 1) {
2714        Some(b'>') => {
2715            return Some(MdxJsxTagStart {
2716                tag: MdxJsxTag::Fragment,
2717                closing: false,
2718            });
2719        }
2720        Some(b'/') if bytes.get(start + 2) == Some(&b'>') => {
2721            return Some(MdxJsxTagStart {
2722                tag: MdxJsxTag::Fragment,
2723                closing: true,
2724            });
2725        }
2726        Some(b'!' | b'?') | None => return None,
2727        _ => {}
2728    }
2729
2730    let closing = bytes.get(start + 1) == Some(&b'/');
2731    let name_start = start + if closing { 2 } else { 1 };
2732    if !bytes
2733        .get(name_start)
2734        .is_some_and(|byte| is_mdx_jsx_name_start_byte(*byte))
2735    {
2736        return None;
2737    }
2738
2739    let mut name_end = name_start + 1;
2740    while bytes
2741        .get(name_end)
2742        .is_some_and(|byte| is_mdx_jsx_name_byte(*byte))
2743    {
2744        name_end += 1;
2745    }
2746    if name_end == name_start {
2747        return None;
2748    }
2749    if bytes
2750        .get(name_end)
2751        .is_some_and(|byte| !is_mdx_jsx_name_delimiter(*byte))
2752    {
2753        return None;
2754    }
2755    Some(MdxJsxTagStart {
2756        tag: MdxJsxTag::Named(&input[name_start..name_end]),
2757        closing,
2758    })
2759}
2760
2761fn mdx_jsx_tag_matches(left: MdxJsxTag<'_>, right: MdxJsxTag<'_>) -> bool {
2762    match (left, right) {
2763        (MdxJsxTag::Fragment, MdxJsxTag::Fragment) => true,
2764        (MdxJsxTag::Named(left), MdxJsxTag::Named(right)) => left == right,
2765        _ => false,
2766    }
2767}
2768
2769fn find_mdx_jsx_tag_end(
2770    lines: &[Line<'_>],
2771    start_line: usize,
2772    start_byte: usize,
2773) -> Option<(usize, usize, bool)> {
2774    let mut line_index = start_line;
2775    let mut byte_index = start_byte + 1;
2776    let mut quote = None;
2777    let mut escaped = false;
2778    let mut expression_depth = 0usize;
2779    let mut expression_state = MdxBraceState::Normal;
2780    let mut expression_escaped = false;
2781
2782    while line_index < lines.len() {
2783        let bytes = lines[line_index].text.as_bytes();
2784        while byte_index < bytes.len() {
2785            let byte = bytes[byte_index];
2786            if expression_depth > 0 {
2787                if update_mdx_jsx_expression_state(
2788                    byte,
2789                    bytes.get(byte_index + 1).copied(),
2790                    &mut expression_depth,
2791                    &mut expression_state,
2792                    &mut expression_escaped,
2793                ) {
2794                    byte_index += 1;
2795                }
2796                byte_index += 1;
2797                continue;
2798            }
2799
2800            if let Some(delimiter) = quote {
2801                if escaped {
2802                    escaped = false;
2803                } else if byte == b'\\' {
2804                    escaped = true;
2805                } else if byte == delimiter {
2806                    quote = None;
2807                }
2808                byte_index += 1;
2809                continue;
2810            }
2811
2812            match byte {
2813                b'\'' | b'"' => quote = Some(byte),
2814                b'{' => {
2815                    expression_depth = 1;
2816                    expression_state = MdxBraceState::Normal;
2817                    expression_escaped = false;
2818                }
2819                b'>' if expression_depth == 0 => {
2820                    let self_closing =
2821                        previous_nonspace_before(lines, line_index, byte_index) == Some(b'/');
2822                    return Some((line_index, byte_index, self_closing));
2823                }
2824                _ => {}
2825            }
2826            byte_index += 1;
2827        }
2828        if expression_state == MdxBraceState::LineComment {
2829            expression_state = MdxBraceState::Normal;
2830        }
2831        line_index += 1;
2832        byte_index = 0;
2833    }
2834    None
2835}
2836
2837fn previous_nonspace_before(
2838    lines: &[Line<'_>],
2839    line_index: usize,
2840    byte_index: usize,
2841) -> Option<u8> {
2842    let mut cursor_line = line_index;
2843    let mut cursor_byte = byte_index;
2844
2845    loop {
2846        if let Some(byte) = lines[cursor_line].text.as_bytes()[..cursor_byte]
2847            .iter()
2848            .rev()
2849            .copied()
2850            .find(|byte| !byte.is_ascii_whitespace())
2851        {
2852            return Some(byte);
2853        }
2854        if cursor_line == 0 {
2855            return None;
2856        }
2857        cursor_line -= 1;
2858        cursor_byte = lines[cursor_line].text.len();
2859    }
2860}
2861
2862fn find_mdx_jsx_tag_end_in_text(input: &str, start_byte: usize) -> Option<(usize, bool)> {
2863    let bytes = input.as_bytes();
2864    let mut byte_index = start_byte + 1;
2865    let mut quote = None;
2866    let mut escaped = false;
2867    let mut expression_depth = 0usize;
2868    let mut expression_state = MdxBraceState::Normal;
2869    let mut expression_escaped = false;
2870
2871    while byte_index < bytes.len() {
2872        let byte = bytes[byte_index];
2873        if expression_depth > 0 {
2874            if update_mdx_jsx_expression_state(
2875                byte,
2876                bytes.get(byte_index + 1).copied(),
2877                &mut expression_depth,
2878                &mut expression_state,
2879                &mut expression_escaped,
2880            ) {
2881                byte_index += 1;
2882            }
2883            byte_index += 1;
2884            continue;
2885        }
2886
2887        if let Some(delimiter) = quote {
2888            if escaped {
2889                escaped = false;
2890            } else if byte == b'\\' {
2891                escaped = true;
2892            } else if byte == delimiter {
2893                quote = None;
2894            }
2895            byte_index += 1;
2896            continue;
2897        }
2898
2899        match byte {
2900            b'\'' | b'"' => quote = Some(byte),
2901            b'{' => {
2902                expression_depth = 1;
2903                expression_state = MdxBraceState::Normal;
2904                expression_escaped = false;
2905            }
2906            b'>' if expression_depth == 0 => {
2907                let self_closing = previous_nonspace_before_text(input, byte_index) == Some(b'/');
2908                return Some((byte_index, self_closing));
2909            }
2910            _ => {}
2911        }
2912        byte_index += 1;
2913    }
2914    None
2915}
2916
2917fn previous_nonspace_before_text(input: &str, byte_index: usize) -> Option<u8> {
2918    input.as_bytes()[..byte_index]
2919        .iter()
2920        .rev()
2921        .copied()
2922        .find(|byte| !byte.is_ascii_whitespace())
2923}
2924
2925fn update_mdx_jsx_expression_state(
2926    byte: u8,
2927    next: Option<u8>,
2928    depth: &mut usize,
2929    state: &mut MdxBraceState,
2930    escaped: &mut bool,
2931) -> bool {
2932    match *state {
2933        MdxBraceState::Normal => match byte {
2934            b'\'' => *state = MdxBraceState::SingleQuoted,
2935            b'"' => *state = MdxBraceState::DoubleQuoted,
2936            b'`' => *state = MdxBraceState::Template,
2937            b'/' if next == Some(b'/') => {
2938                *state = MdxBraceState::LineComment;
2939                return true;
2940            }
2941            b'/' if next == Some(b'*') => {
2942                *state = MdxBraceState::BlockComment;
2943                return true;
2944            }
2945            b'{' => *depth += 1,
2946            b'}' => {
2947                *depth = (*depth).saturating_sub(1);
2948                if *depth == 0 {
2949                    *state = MdxBraceState::Normal;
2950                    *escaped = false;
2951                }
2952            }
2953            _ => {}
2954        },
2955        MdxBraceState::SingleQuoted => {
2956            update_mdx_quote_state(byte, b'\'', state, escaped);
2957        }
2958        MdxBraceState::DoubleQuoted => {
2959            update_mdx_quote_state(byte, b'"', state, escaped);
2960        }
2961        MdxBraceState::Template => {
2962            update_mdx_quote_state(byte, b'`', state, escaped);
2963        }
2964        MdxBraceState::LineComment => {
2965            if byte == b'\n' {
2966                *state = MdxBraceState::Normal;
2967            }
2968        }
2969        MdxBraceState::BlockComment => {
2970            if byte == b'*' && next == Some(b'/') {
2971                *state = MdxBraceState::Normal;
2972                return true;
2973            }
2974        }
2975    }
2976    false
2977}
2978
2979fn is_mdx_jsx_name_start_byte(byte: u8) -> bool {
2980    byte.is_ascii_alphabetic() || matches!(byte, b'_' | b'$')
2981}
2982
2983fn is_mdx_jsx_name_byte(byte: u8) -> bool {
2984    byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b':' | b'_' | b'-' | b'$')
2985}
2986
2987fn is_mdx_jsx_name_delimiter(byte: u8) -> bool {
2988    byte.is_ascii_whitespace() || matches!(byte, b'/' | b'>' | b'{' | b'}')
2989}
2990
2991fn collect_line_range(lines: &[Line<'_>], start: usize, end: usize) -> String {
2992    let mut value = String::new();
2993    let mut cursor = start;
2994    while cursor <= end {
2995        if cursor > start {
2996            value.push('\n');
2997        }
2998        value.push_str(lines[cursor].text);
2999        cursor += 1;
3000    }
3001    value
3002}
3003
3004fn parse_indented_code(
3005    lines: &[Line<'_>],
3006    index: usize,
3007    options: &SyntaxOptions,
3008) -> Option<(Block, usize)> {
3009    if !options.constructs.indented_code || strip_indented_code_prefix(lines[index].text).is_none()
3010    {
3011        return None;
3012    }
3013    let mut value = String::new();
3014    let mut cursor = index;
3015    // Track the last line that carried real content: leading and trailing blank
3016    // lines are not part of an indented code block, only interior ones are.
3017    let mut content_end = index;
3018    let mut content_end_len = 0usize;
3019    while cursor < lines.len() {
3020        if let Some(text) = strip_indented_code_prefix(lines[cursor].text) {
3021            ensure_line_separator(&mut value);
3022            value.push_str(text);
3023            value.push_str(lines[cursor].eol);
3024            if !text.trim().is_empty() {
3025                content_end = cursor;
3026                content_end_len = value.len();
3027            }
3028            cursor += 1;
3029            continue;
3030        }
3031
3032        if !lines[cursor].text.trim().is_empty() {
3033            break;
3034        }
3035        ensure_line_separator(&mut value);
3036        value.push_str(lines[cursor].eol);
3037        cursor += 1;
3038    }
3039    // Drop trailing blank lines accumulated past the last real content line.
3040    value.truncate(content_end_len);
3041    Some((
3042        Block::CodeBlock(CodeBlock {
3043            meta: NodeMeta::new(Some(Span::new(
3044                lines[index].start,
3045                lines[content_end].end_with_eol,
3046            ))),
3047            kind: CodeBlockKind::Indented,
3048            info: None,
3049            value,
3050        }),
3051        cursor,
3052    ))
3053}
3054
3055fn strip_indented_code_prefix(input: &str) -> Option<&str> {
3056    let mut column = 0usize;
3057    for (index, byte) in input.as_bytes().iter().enumerate() {
3058        match *byte {
3059            b' ' => {
3060                column += 1;
3061                if column == 4 {
3062                    return Some(&input[index + 1..]);
3063                }
3064            }
3065            b'\t' => {
3066                column += 4 - (column % 4);
3067                if column >= 4 {
3068                    return Some(&input[index + 1..]);
3069                }
3070            }
3071            _ => return None,
3072        }
3073    }
3074    None
3075}
3076
3077fn parse_table(
3078    lines: &[Line<'_>],
3079    index: usize,
3080    options: &SyntaxOptions,
3081    definitions: &[String],
3082    diagnostics: &mut Vec<Diagnostic>,
3083) -> Option<(Block, usize)> {
3084    if !options.constructs.gfm_table || index + 1 >= lines.len() {
3085        return None;
3086    }
3087    let delimiter = table_indent_line(lines[index + 1].text, options.constructs.indented_code)?;
3088    if list_marker_info(delimiter).is_some() {
3089        return None;
3090    }
3091    if !table_has_separator(lines[index].text, delimiter, options.constructs.spoiler) {
3092        return None;
3093    }
3094    let alignments = parse_table_delimiter(delimiter, options.constructs.spoiler)?;
3095    let headers = split_table_row(lines[index].text, options.constructs.spoiler);
3096    if headers.len() != alignments.len() {
3097        return None;
3098    }
3099
3100    let mut rows = Vec::new();
3101    rows.push(TableRow {
3102        meta: NodeMeta::new(Some(Span::new(lines[index].start, lines[index].end))),
3103        cells: headers
3104            .iter()
3105            .map(|cell| TableCell {
3106                meta: NodeMeta::default(),
3107                children: parse_inlines(
3108                    cell.trim(),
3109                    lines[index].start,
3110                    options,
3111                    definitions,
3112                    diagnostics,
3113                ),
3114            })
3115            .collect(),
3116    });
3117
3118    let mut cursor = index + 2;
3119    while cursor < lines.len() {
3120        let Some(row) = table_indent_line(lines[cursor].text, options.constructs.indented_code)
3121        else {
3122            break;
3123        };
3124        // Once a table is open, every non-blank line that isn't a real block
3125        // start is a body row (GFM); pipeless lines (incl. setext underlines)
3126        // become a single padded cell.
3127        if row.trim().is_empty() || table_body_line_ends_table(lines[cursor].text, options) {
3128            break;
3129        }
3130        let cells = split_table_row(row, options.constructs.spoiler);
3131        rows.push(TableRow {
3132            meta: NodeMeta::new(Some(Span::new(lines[cursor].start, lines[cursor].end))),
3133            cells: alignments
3134                .iter()
3135                .enumerate()
3136                .map(|(cell_index, _)| {
3137                    let value = cells.get(cell_index).map(String::as_str).unwrap_or("");
3138                    TableCell {
3139                        meta: NodeMeta::default(),
3140                        children: parse_inlines(
3141                            value.trim(),
3142                            lines[cursor].start,
3143                            options,
3144                            definitions,
3145                            diagnostics,
3146                        ),
3147                    }
3148                })
3149                .collect(),
3150        });
3151        cursor += 1;
3152    }
3153
3154    Some((
3155        Block::Table(Table {
3156            meta: NodeMeta::new(Some(Span::new(
3157                lines[index].start,
3158                lines[cursor - 1].end_with_eol,
3159            ))),
3160            alignments,
3161            rows,
3162        }),
3163        cursor,
3164    ))
3165}
3166
3167fn parse_setext_heading(
3168    lines: &[Line<'_>],
3169    index: usize,
3170    options: &SyntaxOptions,
3171    definitions: &[String],
3172) -> Option<(Block, usize)> {
3173    if index + 1 >= lines.len() || lines[index].text.trim().is_empty() {
3174        return None;
3175    }
3176
3177    // A setext heading is a (possibly multi-line) paragraph followed by an
3178    // underline. Scan over paragraph-continuation lines to find the underline,
3179    // stopping if a continuation line is itself a block start (which would
3180    // interrupt the paragraph before any underline could apply).
3181    let mut underline_index = index + 1;
3182    loop {
3183        // A setext underline that arrived as a LAZY block-quote continuation is
3184        // paragraph text, not an underline: `> a\n===` is `<p>a\n===</p>`, while
3185        // a MARKED `> a\n> ---` stays an H2 (its `---` is not lazy). The lazy
3186        // flag distinguishes the two; a lazy underline keeps scanning as
3187        // ordinary paragraph-continuation text.
3188        let underline_depth = if lines[underline_index].lazy {
3189            None
3190        } else {
3191            setext_underline_depth(lines[underline_index].text)
3192        };
3193        if let Some(depth) = underline_depth {
3194            let mut value = String::new();
3195            for line in &lines[index..underline_index] {
3196                // Trim leading indentation only: a fully `.trim()`ed content line
3197                // would discard the trailing spaces that form a hard line break.
3198                push_line(&mut value, trim_ascii_start(line.text));
3199            }
3200            return Some((
3201                Block::Heading(Heading {
3202                    meta: NodeMeta::new(Some(Span::new(
3203                        lines[index].start,
3204                        lines[underline_index].end,
3205                    ))),
3206                    depth,
3207                    kind: HeadingKind::Setext,
3208                    children: parse_inlines(
3209                        &value,
3210                        lines[index].start,
3211                        options,
3212                        definitions,
3213                        &mut Vec::new(),
3214                    ),
3215                }),
3216                underline_index + 1,
3217            ));
3218        }
3219
3220        // Not an underline: it must be a valid paragraph-continuation line for
3221        // the run to remain a setext heading.
3222        let line = lines[underline_index].text;
3223        if line.trim().is_empty()
3224            || table_can_start(lines, underline_index, options)
3225            || likely_block_start(line, options)
3226        {
3227            return None;
3228        }
3229        underline_index += 1;
3230        if underline_index >= lines.len() {
3231            return None;
3232        }
3233    }
3234}
3235
3236fn setext_underline_depth(input: &str) -> Option<u8> {
3237    let underline = trim_up_to_three_spaces(input)?.trim();
3238    match underline {
3239        text if !text.is_empty() && text.chars().all(|char| char == '=') => Some(1),
3240        text if !text.is_empty() && text.chars().all(|char| char == '-') => Some(2),
3241        _ => None,
3242    }
3243}
3244
3245fn parse_paragraph(
3246    lines: &[Line<'_>],
3247    index: usize,
3248    options: &SyntaxOptions,
3249    definitions: &[String],
3250    diagnostics: &mut Vec<Diagnostic>,
3251) -> (Block, usize) {
3252    let mut value = String::new();
3253    let start = lines[index].start;
3254    let mut cursor = index;
3255    while cursor < lines.len() {
3256        if lines[cursor].text.trim().is_empty() {
3257            break;
3258        }
3259        // A lazy continuation line is paragraph text by construction (it reached
3260        // this paragraph as the dedented tail of an enclosing container), so it
3261        // cannot itself start a new block — skip the block-boundary checks.
3262        if cursor > index && !lines[cursor].lazy {
3263            if table_can_start(lines, cursor, options) {
3264                break;
3265            }
3266            if likely_block_start(lines[cursor].text, options) {
3267                break;
3268            }
3269        }
3270        if !value.is_empty() {
3271            value.push('\n');
3272        }
3273        value.push_str(trim_ascii_start(lines[cursor].text));
3274        cursor += 1;
3275    }
3276
3277    let end = lines[cursor - 1].end;
3278    (
3279        Block::Paragraph(Paragraph {
3280            meta: NodeMeta::new(Some(Span::new(start, end))),
3281            children: parse_inlines(&value, start, options, definitions, diagnostics),
3282        }),
3283        cursor,
3284    )
3285}
3286
3287/// A `*` or `_` delimiter run recorded during the inline scan for later
3288/// resolution by the CommonMark delimiter-stack algorithm (`process_emphasis`).
3289#[derive(Clone, Copy)]
3290struct DelimMarker {
3291    /// Index of the placeholder text node in the flat node list. The text node
3292    /// holds the as-yet-unmatched delimiter characters; matching trims it from
3293    /// the appropriate side and matched characters are removed entirely.
3294    node_index: usize,
3295    marker: u8,
3296    /// Remaining unmatched delimiter characters in this run.
3297    length: usize,
3298    can_open: bool,
3299    can_close: bool,
3300    /// Absolute byte offset of the run's first remaining delimiter character.
3301    span_start: usize,
3302    /// `true` once this run is consumed (fully matched) or demoted to plain text.
3303    inactive: bool,
3304}
3305
3306/// Records a `*`/`_`/`~` delimiter run as a literal text node plus a stack
3307/// entry.
3308///
3309/// Flanking is computed on the whole run (CommonMark treats left/right-flanking
3310/// as a property of the run, not of an individual delimiter), so the same
3311/// `can_open`/`can_close` helpers that the older ad-hoc scanner used are reused
3312/// here unchanged — including the `_` intraword punctuation rules.
3313///
3314/// `strikethrough` enables the GFM cross-marker bonus: when strikethrough is an
3315/// active construct, a `*`/`_` run immediately adjacent to a `~` counts as
3316/// openable/closeable even though `~` is a punctuation character (this is what
3317/// makes `a*~b~*c` emphasize). The bonus is never granted to a `~` run itself —
3318/// tilde gets plain CommonMark flanking.
3319fn record_emphasis_delimiter(
3320    nodes: &mut Vec<Inline>,
3321    delimiters: &mut Vec<DelimMarker>,
3322    input: &str,
3323    index: usize,
3324    base_offset: usize,
3325    marker: u8,
3326    strikethrough: bool,
3327) {
3328    let length = delimiter_byte_run_len(input, index, marker);
3329    let (mut can_open, mut can_close) = if marker == b'_' {
3330        (
3331            can_open_underscore(input, index, length),
3332            can_close_underscore(input, index, length),
3333        )
3334    } else {
3335        (
3336            can_open_delimited(input, index, length),
3337            can_close_delimited(input, index, length),
3338        )
3339    };
3340
3341    // GFM: a `*`/`_` run touching a `~` strikethrough marker may open/close even
3342    // when ordinary flanking refuses it (the `~` would otherwise be a blocking
3343    // punctuation neighbour). Tilde itself never receives this bonus.
3344    if strikethrough && marker != b'~' {
3345        let before = input[..index].chars().next_back();
3346        let after = input[index + length..].chars().next();
3347        if after == Some('~') {
3348            can_open = true;
3349        }
3350        if before == Some('~') {
3351            can_close = true;
3352        }
3353    }
3354
3355    let value = String::from(marker as char).repeat(length);
3356
3357    let node_index = nodes.len();
3358    nodes.push(Inline::Text(Text {
3359        meta: NodeMeta::new(Some(Span::new(
3360            base_offset + index,
3361            base_offset + index + length,
3362        ))),
3363        value,
3364    }));
3365
3366    delimiters.push(DelimMarker {
3367        node_index,
3368        marker,
3369        length,
3370        can_open,
3371        can_close,
3372        span_start: base_offset + index,
3373        inactive: false,
3374    });
3375}
3376
3377/// Resolves recorded `*`/`_` delimiter runs into `Emphasis`/`Strong` nodes using
3378/// the CommonMark delimiter-stack algorithm, leaving unmatched runs as text.
3379fn process_emphasis(mut nodes: Vec<Inline>, mut delimiters: Vec<DelimMarker>) -> Vec<Inline> {
3380    if delimiters.is_empty() {
3381        return nodes;
3382    }
3383
3384    // `openers_bottom` records, per (marker, opener-can-also-close, length % 3),
3385    // the lowest opener index a closer is allowed to reach. Closers below this
3386    // bound for their key have already been proven to have no compatible opener.
3387    // Three markers (`*`, `_`, `~`) × both-flag × length%3.
3388    let mut openers_bottom: [Option<usize>; 18] = [None; 18];
3389    let mut closer_idx = 0;
3390
3391    while closer_idx < delimiters.len() {
3392        let closer = delimiters[closer_idx];
3393        if closer.inactive || !closer.can_close {
3394            closer_idx += 1;
3395            continue;
3396        }
3397
3398        let key = openers_bottom_key(&closer);
3399        let bottom = openers_bottom[key];
3400
3401        // Walk back to the nearest compatible opener above the recorded bound.
3402        let mut opener_idx = None;
3403        let mut search = closer_idx;
3404        while search > 0 {
3405            search -= 1;
3406            if let Some(bottom) = bottom {
3407                if search < bottom {
3408                    break;
3409                }
3410            }
3411            let candidate = delimiters[search];
3412            if candidate.inactive || candidate.marker != closer.marker || !candidate.can_open {
3413                continue;
3414            }
3415            if emphasis_delimiters_match(&candidate, &closer) {
3416                opener_idx = Some(search);
3417                break;
3418            }
3419        }
3420
3421        let Some(opener_idx) = opener_idx else {
3422            // No opener found: remember how far we searched so future closers of
3423            // the same key skip the same dead range. A closer that cannot also
3424            // open is removed so it is never revisited.
3425            openers_bottom[key] = Some(closer_idx);
3426            if !closer.can_open {
3427                delimiters[closer_idx].inactive = true;
3428            }
3429            closer_idx += 1;
3430            continue;
3431        };
3432
3433        let (used, wrap) = if closer.marker == b'~' {
3434            // Strikethrough consumes the whole (equal-length) run on each side at
3435            // once; the marker width selects the `Delete` flavour.
3436            let length = delimiters[closer_idx].length;
3437            let marker = if length >= 2 {
3438                DeleteMarker::DoubleTilde
3439            } else {
3440                DeleteMarker::SingleTilde
3441            };
3442            (length, EmphasisWrap::Delete(marker))
3443        } else {
3444            let strong = delimiters[opener_idx].length >= 2 && delimiters[closer_idx].length >= 2;
3445            let used = if strong { 2 } else { 1 };
3446            let wrap = if strong {
3447                EmphasisWrap::Strong
3448            } else {
3449                EmphasisWrap::Emphasis
3450            };
3451            (used, wrap)
3452        };
3453
3454        apply_emphasis(
3455            &mut nodes,
3456            &mut delimiters,
3457            opener_idx,
3458            closer_idx,
3459            used,
3460            wrap,
3461        );
3462
3463        // Drop delimiters strictly between the opener and closer: they could not
3464        // match outward across this newly closed span.
3465        let mut inner = opener_idx + 1;
3466        while inner < closer_idx {
3467            delimiters[inner].inactive = true;
3468            inner += 1;
3469        }
3470
3471        if delimiters[opener_idx].length == 0 {
3472            delimiters[opener_idx].inactive = true;
3473        }
3474        if delimiters[closer_idx].length == 0 {
3475            delimiters[closer_idx].inactive = true;
3476            closer_idx += 1;
3477        }
3478        // When the closer still has delimiters left it stays the active closer so
3479        // the leftover can match an earlier opener (e.g. `***foo*` keeps `**`).
3480    }
3481
3482    // Adjacent text nodes can appear where unmatched delimiter runs ended up
3483    // beside literal text (`**foo*bar*` -> `**foo` + emphasis). CommonMark
3484    // coalesces them as the final step; do the same for the spans we created.
3485    merge_adjacent_text(&mut nodes);
3486    nodes
3487}
3488
3489/// Merges consecutive `Text` nodes in a list, recursing into the `Emphasis`/
3490/// `Strong` nodes produced at this level. Other containers were already
3491/// finalized by their own `parse_inlines` pass and are left untouched.
3492fn merge_adjacent_text(nodes: &mut Vec<Inline>) {
3493    let mut write = 0;
3494    for read in 0..nodes.len() {
3495        if read != write {
3496            nodes.swap(read, write);
3497        }
3498        if write > 0 {
3499            let (head, tail) = nodes.split_at_mut(write);
3500            if let (Inline::Text(previous), Inline::Text(current)) =
3501                (&mut head[write - 1], &tail[0])
3502            {
3503                previous.value.push_str(&current.value);
3504                if let (Some(previous_span), Some(current_span)) =
3505                    (previous.meta.span.as_mut(), current.meta.span)
3506                {
3507                    previous_span.end = current_span.end;
3508                }
3509                continue;
3510            }
3511        }
3512        write += 1;
3513    }
3514    nodes.truncate(write);
3515
3516    for node in nodes.iter_mut() {
3517        match node {
3518            Inline::Emphasis(emphasis) => merge_adjacent_text(&mut emphasis.children),
3519            Inline::Strong(strong) => merge_adjacent_text(&mut strong.children),
3520            Inline::Delete(delete) => merge_adjacent_text(&mut delete.children),
3521            _ => {}
3522        }
3523    }
3524}
3525
3526/// Index into `openers_bottom` for a closer's (marker, both-flags, length%3) key.
3527fn openers_bottom_key(closer: &DelimMarker) -> usize {
3528    let marker = match closer.marker {
3529        b'_' => 1,
3530        b'~' => 2,
3531        _ => 0,
3532    };
3533    let both = usize::from(closer.can_open && closer.can_close);
3534    let modulo = closer.length % 3;
3535    ((marker * 2) + both) * 3 + modulo
3536}
3537
3538/// CommonMark opener/closer compatibility, including the rule of three.
3539fn emphasis_delimiters_match(opener: &DelimMarker, closer: &DelimMarker) -> bool {
3540    // GFM strikethrough: opener and closer runs must be the same length (a `~`
3541    // never pairs with `~~`). The rule of three does not apply to `~`.
3542    if opener.marker == b'~' {
3543        return opener.length == closer.length;
3544    }
3545
3546    // Rule of three: if either delimiter can both open and close, the sum of the
3547    // two run lengths must not be a multiple of three, unless both lengths are
3548    // themselves multiples of three.
3549    let opener_both = opener.can_open && opener.can_close;
3550    let closer_both = closer.can_open && closer.can_close;
3551    if opener_both || closer_both {
3552        let sum = opener.length + closer.length;
3553        if sum % 3 == 0 && !(opener.length % 3 == 0 && closer.length % 3 == 0) {
3554            return false;
3555        }
3556    }
3557    true
3558}
3559
3560/// The node a matched delimiter pair collapses into.
3561#[derive(Clone, Copy)]
3562enum EmphasisWrap {
3563    Emphasis,
3564    Strong,
3565    Delete(DeleteMarker),
3566}
3567
3568/// Wraps the nodes between two delimiter runs into an `Emphasis`/`Strong`/
3569/// `Delete` node, consuming `used` characters from each side and keeping every
3570/// other delimiter's `node_index` consistent with the rewritten node list.
3571fn apply_emphasis(
3572    nodes: &mut Vec<Inline>,
3573    delimiters: &mut [DelimMarker],
3574    opener_idx: usize,
3575    closer_idx: usize,
3576    used: usize,
3577    wrap: EmphasisWrap,
3578) {
3579    let opener_node = delimiters[opener_idx].node_index;
3580    let closer_node = delimiters[closer_idx].node_index;
3581
3582    // Trim the consumed characters from the opener's text node (right side) and
3583    // the closer's text node (left side), updating their recorded lengths/spans.
3584    trim_delimiter_text_tail(&mut nodes[opener_node], used);
3585    delimiters[opener_idx].length -= used;
3586    delimiters[opener_idx].span_start += used;
3587
3588    trim_delimiter_text_head(&mut nodes[closer_node], used);
3589    delimiters[closer_idx].length -= used;
3590
3591    // Span covers the consumed opener delimiters through the consumed closer
3592    // delimiters. The exact value is informational; structure is what matters.
3593    let span_start = delimiters[opener_idx].span_start - used;
3594    let span_end = delimiters[closer_idx].span_start + delimiters[closer_idx].length + used;
3595
3596    // The wrapped children are the nodes strictly between the opener and closer
3597    // text nodes.
3598    let children_start = opener_node + 1;
3599    let children_end = closer_node; // exclusive
3600    let children: Vec<Inline> = nodes.drain(children_start..children_end).collect();
3601    let removed = children.len();
3602
3603    let meta = NodeMeta::new(Some(Span::new(span_start, span_end)));
3604    let wrapped = match wrap {
3605        EmphasisWrap::Strong => Inline::Strong(Strong { meta, children }),
3606        EmphasisWrap::Emphasis => Inline::Emphasis(Emphasis { meta, children }),
3607        EmphasisWrap::Delete(marker) => Inline::Delete(Delete {
3608            meta,
3609            marker,
3610            children,
3611        }),
3612    };
3613    nodes.insert(children_start, wrapped);
3614
3615    // Indices at or past the (old) closer node shift by `1 - removed`: the drain
3616    // removed `removed` nodes then the insert added one. Apply this using the
3617    // original `children_end` threshold before any further mutation.
3618    reindex_delimiters(delimiters, children_end, 1 - removed as isize);
3619
3620    // Drop any placeholder text node that has been fully consumed so leftover
3621    // delimiters never survive as literal text. Remove the closer first because
3622    // it sits at the higher index and removal shifts everything after it.
3623    if delimiters[closer_idx].length == 0 {
3624        let pos = delimiters[closer_idx].node_index;
3625        nodes.remove(pos);
3626        reindex_delimiters(delimiters, pos, -1);
3627    }
3628    if delimiters[opener_idx].length == 0 {
3629        let pos = delimiters[opener_idx].node_index;
3630        nodes.remove(pos);
3631        reindex_delimiters(delimiters, pos, -1);
3632    }
3633}
3634
3635/// Adjusts `node_index` for every delimiter at or after `from` by `delta`.
3636fn reindex_delimiters(delimiters: &mut [DelimMarker], from: usize, delta: isize) {
3637    if delta == 0 {
3638        return;
3639    }
3640    for delimiter in delimiters.iter_mut() {
3641        if delimiter.node_index >= from {
3642            delimiter.node_index = (delimiter.node_index as isize + delta) as usize;
3643        }
3644    }
3645}
3646
3647/// Removes `count` trailing delimiter characters from a placeholder text node.
3648fn trim_delimiter_text_tail(node: &mut Inline, count: usize) {
3649    if let Inline::Text(text) = node {
3650        let new_len = text.value.len().saturating_sub(count);
3651        text.value.truncate(new_len);
3652        if let Some(span) = text.meta.span.as_mut() {
3653            span.end = span.end.saturating_sub(count);
3654        }
3655    }
3656}
3657
3658/// Removes `count` leading delimiter characters from a placeholder text node.
3659fn trim_delimiter_text_head(node: &mut Inline, count: usize) {
3660    if let Inline::Text(text) = node {
3661        let count = count.min(text.value.len());
3662        text.value.drain(..count);
3663        if let Some(span) = text.meta.span.as_mut() {
3664            span.start += count;
3665        }
3666    }
3667}
3668
3669fn parse_inlines(
3670    input: &str,
3671    base_offset: usize,
3672    options: &SyntaxOptions,
3673    definitions: &[String],
3674    diagnostics: &mut Vec<Diagnostic>,
3675) -> Vec<Inline> {
3676    parse_inlines_with_context(
3677        input,
3678        base_offset,
3679        options,
3680        definitions,
3681        diagnostics,
3682        InlineContext::default(),
3683    )
3684}
3685
3686#[derive(Clone, Copy)]
3687struct InlineContext {
3688    allow_links: bool,
3689}
3690
3691impl Default for InlineContext {
3692    fn default() -> Self {
3693        Self { allow_links: true }
3694    }
3695}
3696
3697fn parse_inlines_with_context(
3698    input: &str,
3699    base_offset: usize,
3700    options: &SyntaxOptions,
3701    definitions: &[String],
3702    diagnostics: &mut Vec<Diagnostic>,
3703    context: InlineContext,
3704) -> Vec<Inline> {
3705    let bytes = input.as_bytes();
3706    let mut nodes = Vec::new();
3707    let mut text_start = 0;
3708    let mut text = String::new();
3709    let mut index = 0;
3710    // Core `*`/`_` emphasis is resolved with a CommonMark delimiter stack after
3711    // the scan completes. During the scan we emit each candidate delimiter run as
3712    // a literal text node and record its position here so `process_emphasis` can
3713    // rewrite the flat node list into Emphasis/Strong (or leave it as text).
3714    let mut delimiters: Vec<DelimMarker> = Vec::new();
3715
3716    while index < bytes.len() {
3717        if bytes[index] == b'\\' {
3718            if let Some((next_index, char)) = next_char(input, index + 1) {
3719                if char.is_ascii_punctuation() {
3720                    if options.parse.preserve_character_escapes {
3721                        flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3722                        nodes.push(Inline::Escape(Escape {
3723                            meta: NodeMeta::new(Some(Span::new(
3724                                base_offset + index,
3725                                base_offset + next_index,
3726                            ))),
3727                            value: char,
3728                        }));
3729                        index = next_index;
3730                        text_start = index;
3731                        continue;
3732                    }
3733                    if text.is_empty() {
3734                        text_start = base_offset + index;
3735                    }
3736                    if gfm_link_label_preserves_url_dot_escape(&text, char, options, context) {
3737                        text.push('\\');
3738                    }
3739                    text.push(char);
3740                    index = next_index;
3741                    continue;
3742                }
3743            }
3744        }
3745
3746        if bytes[index] == b'&' {
3747            if let Some((end, value)) = parse_character_reference(input, index) {
3748                if options.parse.preserve_character_references {
3749                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3750                    nodes.push(Inline::CharacterReference(CharacterReference {
3751                        meta: NodeMeta::new(Some(Span::new(
3752                            base_offset + index,
3753                            base_offset + end,
3754                        ))),
3755                        reference: input[index..end].into(),
3756                        value,
3757                    }));
3758                    index = end;
3759                    text_start = index;
3760                    continue;
3761                }
3762                if text.is_empty() {
3763                    text_start = base_offset + index;
3764                }
3765                text.push_str(&value);
3766                index = end;
3767                continue;
3768            }
3769        }
3770
3771        if bytes[index] == b'\n' {
3772            if text.ends_with('\\') {
3773                text.pop();
3774                flush_text(
3775                    &mut nodes,
3776                    &mut text,
3777                    text_start,
3778                    base_offset + index.saturating_sub(1),
3779                );
3780                nodes.push(Inline::LineBreak(LineBreak {
3781                    meta: NodeMeta::new(Some(Span::new(
3782                        base_offset + index.saturating_sub(1),
3783                        base_offset + index + 1,
3784                    ))),
3785                    kind: LineBreakKind::Backslash,
3786                }));
3787                index += 1;
3788                text_start = index;
3789                continue;
3790            }
3791            let trailing_spaces = trailing_space_count(&text);
3792            if is_hard_break_suffix(&text, trailing_spaces) {
3793                text.truncate(text.len() - trailing_spaces);
3794                flush_text(
3795                    &mut nodes,
3796                    &mut text,
3797                    text_start,
3798                    base_offset + index.saturating_sub(trailing_spaces),
3799                );
3800                nodes.push(Inline::LineBreak(LineBreak {
3801                    meta: NodeMeta::new(Some(Span::new(
3802                        base_offset + index.saturating_sub(trailing_spaces),
3803                        base_offset + index + 1,
3804                    ))),
3805                    kind: LineBreakKind::Spaces,
3806                }));
3807                index += 1;
3808                text_start = index;
3809                continue;
3810            }
3811            if trailing_spaces > 0 {
3812                text.truncate(text.len() - trailing_spaces);
3813            }
3814            flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3815            nodes.push(Inline::SoftBreak(SoftBreak {
3816                meta: NodeMeta::new(Some(Span::new(
3817                    base_offset + index,
3818                    base_offset + index + 1,
3819                ))),
3820            }));
3821            index += 1;
3822            text_start = index;
3823            continue;
3824        }
3825
3826        if bytes[index] == b'`' {
3827            if let Some((end, code_span)) = parse_code_span(input, index) {
3828                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3829                nodes.push(Inline::Code(CodeInline {
3830                    meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
3831                    value: code_span.value,
3832                    raw: code_span.raw,
3833                    fence_length: code_span.fence_length,
3834                }));
3835                index = end;
3836                text_start = index;
3837                continue;
3838            } else {
3839                // No matching-length close for this opening backtick run:
3840                // CommonMark renders the whole run as literal text. Consume the
3841                // entire run here so the loop does not advance one byte and retry
3842                // a shorter sub-run that could spuriously match a shorter close
3843                // (```foo`` stayed a phantom 2-backtick code span).
3844                let run = bytes[index..]
3845                    .iter()
3846                    .take_while(|byte| **byte == b'`')
3847                    .count();
3848                if text.is_empty() {
3849                    text_start = base_offset + index;
3850                }
3851                for _ in 0..run {
3852                    text.push('`');
3853                }
3854                index += run;
3855                continue;
3856            }
3857        }
3858
3859        if options.constructs.spoiler
3860            && bytes.get(index) == Some(&b'|')
3861            && bytes.get(index + 1) == Some(&b'|')
3862            && bytes.get(index + 2) != Some(&b'|')
3863        {
3864            if let Some(end) = find_spoiler_close(input, index + 2) {
3865                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3866                let inner = &input[index + 2..end];
3867                nodes.push(Inline::Spoiler(Spoiler {
3868                    meta: NodeMeta::new(Some(Span::new(
3869                        base_offset + index,
3870                        base_offset + end + 2,
3871                    ))),
3872                    children: parse_inlines_with_context(
3873                        inner,
3874                        base_offset + index + 2,
3875                        options,
3876                        definitions,
3877                        diagnostics,
3878                        context,
3879                    ),
3880                }));
3881                index = end + 2;
3882                text_start = index;
3883                continue;
3884            }
3885        }
3886
3887        if bytes[index] == b'*' && delimiter_byte_run_start(input, index, b'*') == index {
3888            let run_len = delimiter_byte_run_len(input, index, b'*');
3889            flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3890            record_emphasis_delimiter(
3891                &mut nodes,
3892                &mut delimiters,
3893                input,
3894                index,
3895                base_offset,
3896                b'*',
3897                options.constructs.gfm_strikethrough,
3898            );
3899            index += run_len;
3900            text_start = index;
3901            continue;
3902        }
3903
3904        if options.constructs.underline
3905            && bytes.get(index) == Some(&b'_')
3906            && bytes.get(index + 1) == Some(&b'_')
3907            && bytes.get(index + 2) == Some(&b'_')
3908            && can_open_underscore(input, index, 1)
3909        {
3910            if let Some(end) = find_closing_delimiter(input, index + 3, "___", true) {
3911                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3912                let inner = &input[index + 3..end];
3913                let underline = Inline::Underline(Underline {
3914                    meta: NodeMeta::new(Some(Span::new(
3915                        base_offset + index + 1,
3916                        base_offset + end + 2,
3917                    ))),
3918                    children: parse_inlines_with_context(
3919                        inner,
3920                        base_offset + index + 3,
3921                        options,
3922                        definitions,
3923                        diagnostics,
3924                        context,
3925                    ),
3926                });
3927                nodes.push(Inline::Emphasis(Emphasis {
3928                    meta: NodeMeta::new(Some(Span::new(
3929                        base_offset + index,
3930                        base_offset + end + 3,
3931                    ))),
3932                    children: vec![underline],
3933                }));
3934                index = end + 3;
3935                text_start = index;
3936                continue;
3937            }
3938        }
3939
3940        if options.constructs.underline
3941            && bytes.get(index) == Some(&b'_')
3942            && bytes.get(index + 1) == Some(&b'_')
3943            && can_open_underscore(input, index, 2)
3944        {
3945            if let Some(end) = find_closing_delimiter(input, index + 2, "__", true) {
3946                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3947                let inner = &input[index + 2..end];
3948                nodes.push(Inline::Underline(Underline {
3949                    meta: NodeMeta::new(Some(Span::new(
3950                        base_offset + index,
3951                        base_offset + end + 2,
3952                    ))),
3953                    children: parse_inlines_with_context(
3954                        inner,
3955                        base_offset + index + 2,
3956                        options,
3957                        definitions,
3958                        diagnostics,
3959                        context,
3960                    ),
3961                }));
3962                index = end + 2;
3963                text_start = index;
3964                continue;
3965            }
3966        }
3967
3968        // Core `_` emphasis/strong is resolved by the delimiter stack, just like
3969        // `*`. The `___`/`__` underline-extension branches above run first and
3970        // `continue` when they consume the run, so reaching this point means the
3971        // run is plain emphasis material (underline disabled, or no underline
3972        // close was found).
3973        if bytes[index] == b'_' && delimiter_byte_run_start(input, index, b'_') == index {
3974            // A leading `_` can begin a GFM email local part (`_a@b.c`); try the
3975            // literal autolink before recording the `_` as an emphasis
3976            // delimiter, otherwise the `_` would be consumed and the email would
3977            // wrongly start one char later (where its left boundary fails).
3978            if (options.constructs.gfm_autolink_literal || options.constructs.relaxed_autolinks)
3979                && context.allow_links
3980            {
3981                if let Some((end, destination)) = parse_literal_autolink(
3982                    input,
3983                    index,
3984                    options.constructs.gfm_autolink_literal,
3985                    options.constructs.relaxed_autolinks,
3986                ) {
3987                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
3988                    nodes.push(Inline::Autolink(Autolink {
3989                        meta: NodeMeta::new(Some(Span::new(
3990                            base_offset + index,
3991                            base_offset + end,
3992                        ))),
3993                        destination,
3994                        kind: AutolinkKind::GfmLiteral {
3995                            original: input[index..end].into(),
3996                        },
3997                    }));
3998                    index = end;
3999                    text_start = index;
4000                    continue;
4001                }
4002            }
4003            let run_len = delimiter_byte_run_len(input, index, b'_');
4004            flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4005            record_emphasis_delimiter(
4006                &mut nodes,
4007                &mut delimiters,
4008                input,
4009                index,
4010                base_offset,
4011                b'_',
4012                options.constructs.gfm_strikethrough,
4013            );
4014            index += run_len;
4015            text_start = index;
4016            continue;
4017        }
4018
4019        if options.constructs.insert
4020            && bytes.get(index) == Some(&b'+')
4021            && bytes.get(index + 1) == Some(&b'+')
4022            && bytes.get(index + 2) != Some(&b'+')
4023            && can_open_delimited(input, index, 2)
4024        {
4025            if let Some(end) = find_closing_delimiter(input, index + 2, "++", false) {
4026                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4027                let inner = &input[index + 2..end];
4028                nodes.push(Inline::Insert(Insert {
4029                    meta: NodeMeta::new(Some(Span::new(
4030                        base_offset + index,
4031                        base_offset + end + 2,
4032                    ))),
4033                    children: parse_inlines_with_context(
4034                        inner,
4035                        base_offset + index + 2,
4036                        options,
4037                        definitions,
4038                        diagnostics,
4039                        context,
4040                    ),
4041                }));
4042                index = end + 2;
4043                text_start = index;
4044                continue;
4045            }
4046        }
4047
4048        if options.constructs.highlight
4049            && bytes.get(index) == Some(&b'=')
4050            && bytes.get(index + 1) == Some(&b'=')
4051            && bytes.get(index + 2) != Some(&b'=')
4052            && can_open_delimited(input, index, 2)
4053        {
4054            if let Some(end) = find_closing_delimiter(input, index + 2, "==", false) {
4055                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4056                let inner = &input[index + 2..end];
4057                nodes.push(Inline::Mark(Mark {
4058                    meta: NodeMeta::new(Some(Span::new(
4059                        base_offset + index,
4060                        base_offset + end + 2,
4061                    ))),
4062                    children: parse_inlines_with_context(
4063                        inner,
4064                        base_offset + index + 2,
4065                        options,
4066                        definitions,
4067                        diagnostics,
4068                        context,
4069                    ),
4070                }));
4071                index = end + 2;
4072                text_start = index;
4073                continue;
4074            }
4075        }
4076
4077        if options.constructs.subscript
4078            && starts_exact_byte_run(input, index, b'~', 1)
4079            && !single_tilde_delete_takes_precedence(options, input, index)
4080        {
4081            if let Some(end) = find_simple_inline_close(input, index + 1, b'~') {
4082                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4083                let inner = &input[index + 1..end];
4084                nodes.push(Inline::Subscript(Subscript {
4085                    meta: NodeMeta::new(Some(Span::new(
4086                        base_offset + index,
4087                        base_offset + end + 1,
4088                    ))),
4089                    children: parse_inlines_with_context(
4090                        inner,
4091                        base_offset + index + 1,
4092                        options,
4093                        definitions,
4094                        diagnostics,
4095                        context,
4096                    ),
4097                }));
4098                index = end + 1;
4099                text_start = index;
4100                continue;
4101            }
4102        }
4103
4104        if options.constructs.inline_footnote
4105            && options.constructs.footnote_reference
4106            && bytes.get(index) == Some(&b'^')
4107            && bytes.get(index + 1) == Some(&b'[')
4108        {
4109            if let Some(close) = find_inline_footnote_end(input, index + 2) {
4110                let inner = &input[index + 2..close];
4111                if !inner.trim().is_empty() {
4112                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4113                    nodes.push(Inline::InlineFootnote(InlineFootnote {
4114                        meta: NodeMeta::new(Some(Span::new(
4115                            base_offset + index,
4116                            base_offset + close + 1,
4117                        ))),
4118                        children: parse_inlines_with_context(
4119                            inner,
4120                            base_offset + index + 2,
4121                            options,
4122                            definitions,
4123                            diagnostics,
4124                            context,
4125                        ),
4126                    }));
4127                    index = close + 1;
4128                    text_start = index;
4129                    continue;
4130                }
4131            }
4132        }
4133
4134        if options.constructs.superscript
4135            && bytes.get(index) == Some(&b'^')
4136            && !(options.constructs.inline_footnote && bytes.get(index + 1) == Some(&b'['))
4137        {
4138            if let Some(end) = find_simple_inline_close(input, index + 1, b'^') {
4139                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4140                let inner = &input[index + 1..end];
4141                nodes.push(Inline::Superscript(Superscript {
4142                    meta: NodeMeta::new(Some(Span::new(
4143                        base_offset + index,
4144                        base_offset + end + 1,
4145                    ))),
4146                    children: parse_inlines_with_context(
4147                        inner,
4148                        base_offset + index + 1,
4149                        options,
4150                        definitions,
4151                        diagnostics,
4152                        context,
4153                    ),
4154                }));
4155                index = end + 1;
4156                text_start = index;
4157                continue;
4158            }
4159        }
4160
4161        // GFM strikethrough joins the shared CommonMark delimiter stack: a `~`
4162        // run is recorded as a candidate run (just like `*`/`_`) and paired into
4163        // `Delete` by `process_emphasis`, rather than scanned greedily here. Only
4164        // runs of length 1 (single-tilde mode) or 2 can ever form strikethrough;
4165        // runs of 3+ never do, so they fall through to literal text. The
4166        // subscript branch above already claimed single `~` runs it owns.
4167        if options.constructs.gfm_strikethrough
4168            && bytes[index] == b'~'
4169            && delimiter_byte_run_start(input, index, b'~') == index
4170        {
4171            let run_len = delimiter_byte_run_len(input, index, b'~');
4172            let recordable =
4173                run_len == 2 || (run_len == 1 && options.parse.single_tilde_strikethrough);
4174            if recordable {
4175                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4176                record_emphasis_delimiter(
4177                    &mut nodes,
4178                    &mut delimiters,
4179                    input,
4180                    index,
4181                    base_offset,
4182                    b'~',
4183                    true,
4184                );
4185                index += run_len;
4186                text_start = index;
4187                continue;
4188            }
4189        }
4190
4191        if bytes[index] == b'!' && index + 1 < bytes.len() && bytes[index + 1] == b'[' {
4192            if let Some((end, image)) =
4193                parse_image(input, index, base_offset, options, definitions, diagnostics)
4194            {
4195                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4196                nodes.push(image);
4197                index = end;
4198                text_start = index;
4199                continue;
4200            }
4201        }
4202
4203        if bytes[index] == b'[' {
4204            if let Some((end, wikilink)) = parse_wikilink(input, index, base_offset, options) {
4205                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4206                nodes.push(wikilink);
4207                index = end;
4208                text_start = index;
4209                continue;
4210            }
4211            if let Some((end, link)) = parse_link(
4212                input,
4213                index,
4214                base_offset,
4215                options,
4216                definitions,
4217                diagnostics,
4218                context,
4219            ) {
4220                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4221                nodes.push(link);
4222                index = end;
4223                text_start = index;
4224                continue;
4225            }
4226            if options.constructs.footnote_reference
4227                && bytes.get(index) == Some(&b'[')
4228                && bytes.get(index + 1) == Some(&b'^')
4229            {
4230                if let Some(close) = find_footnote_reference_label_end(input, index + 2) {
4231                    let label = &input[index + 2..close];
4232                    if is_footnote_label(label) {
4233                        flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4234                        nodes.push(Inline::FootnoteReference(FootnoteReference {
4235                            meta: NodeMeta::new(Some(Span::new(
4236                                base_offset + index,
4237                                base_offset + close + 1,
4238                            ))),
4239                            label: label.into(),
4240                            identifier: normalize_label(label),
4241                        }));
4242                        index = close + 1;
4243                        text_start = index;
4244                        continue;
4245                    }
4246                }
4247            }
4248        }
4249
4250        if bytes[index] == b'$' && options.constructs.math_inline {
4251            if let Some((end, value, kind)) = parse_math_inline(input, index) {
4252                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4253                nodes.push(Inline::Math(MathInline {
4254                    meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4255                    value,
4256                    kind,
4257                }));
4258                index = end;
4259                text_start = index;
4260                continue;
4261            }
4262            // A dollar run that opens but finds no exact-length close is emitted
4263            // as literal text in one piece (like a code-span). Skipping the
4264            // whole run prevents re-opening with a shorter marker inside it, so
4265            // `$$$foo$$` stays literal rather than matching `$$foo$$`. A lone
4266            // `$` before a backtick (the code-math form) is a run of 1, so this
4267            // still advances correctly when that form fails.
4268            let run = bytes[index..]
4269                .iter()
4270                .take_while(|byte| **byte == b'$')
4271                .count();
4272            if run > 1 {
4273                if text.is_empty() {
4274                    text_start = base_offset + index;
4275                }
4276                text.push_str(&input[index..index + run]);
4277                index += run;
4278                continue;
4279            }
4280        }
4281
4282        // GFM bare autolinks must not fire inside an existing link's text
4283        // (no links in links) — `context.allow_links` is false in label scans.
4284        if (options.constructs.gfm_autolink_literal || options.constructs.relaxed_autolinks)
4285            && context.allow_links
4286        {
4287            if let Some((end, destination)) = parse_literal_autolink(
4288                input,
4289                index,
4290                options.constructs.gfm_autolink_literal,
4291                options.constructs.relaxed_autolinks,
4292            ) {
4293                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4294                nodes.push(Inline::Autolink(Autolink {
4295                    meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4296                    destination,
4297                    kind: AutolinkKind::GfmLiteral {
4298                        original: input[index..end].into(),
4299                    },
4300                }));
4301                index = end;
4302                text_start = index;
4303                continue;
4304            }
4305        }
4306
4307        if bytes[index] == b'<' {
4308            if let Some(end) = parse_autolink_end(input, index) {
4309                let raw = &input[index..end];
4310                if is_autolink(raw) {
4311                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4312                    if context.allow_links {
4313                        nodes.push(Inline::Autolink(Autolink {
4314                            meta: NodeMeta::new(Some(Span::new(
4315                                base_offset + index,
4316                                base_offset + end,
4317                            ))),
4318                            destination: raw[1..raw.len() - 1].into(),
4319                            kind: AutolinkKind::Angle,
4320                        }));
4321                    } else {
4322                        nodes.push(Inline::Text(Text {
4323                            meta: NodeMeta::new(Some(Span::new(
4324                                base_offset + index,
4325                                base_offset + end,
4326                            ))),
4327                            value: raw[1..raw.len() - 1].into(),
4328                        }));
4329                    }
4330                    index = end;
4331                    text_start = index;
4332                    continue;
4333                }
4334            }
4335            if options.constructs.mdx_jsx_inline {
4336                if let Some((end, raw)) = parse_mdx_jsx_inline(input, index) {
4337                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4338                    nodes.push(Inline::MdxJsx(MdxJsxInline {
4339                        meta: NodeMeta::new(Some(Span::new(
4340                            base_offset + index,
4341                            base_offset + end,
4342                        ))),
4343                        value: raw,
4344                    }));
4345                    index = end;
4346                    text_start = index;
4347                    continue;
4348                }
4349            }
4350            if let Some((end, raw)) = parse_html_inline(input, index) {
4351                if options.constructs.html_inline {
4352                    flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4353                    nodes.push(Inline::Html(HtmlInline {
4354                        meta: NodeMeta::new(Some(Span::new(
4355                            base_offset + index,
4356                            base_offset + end,
4357                        ))),
4358                        value: raw,
4359                    }));
4360                    index = end;
4361                    text_start = index;
4362                    continue;
4363                }
4364            }
4365        }
4366
4367        if bytes[index] == b'{' && options.constructs.mdx_expression_inline {
4368            if let Some(end) = find_mdx_expression_inline_close(input, index) {
4369                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4370                nodes.push(Inline::MdxExpression(MdxExpressionInline {
4371                    meta: NodeMeta::new(Some(Span::new(
4372                        base_offset + index,
4373                        base_offset + end + 1,
4374                    ))),
4375                    value: input[index + 1..end].into(),
4376                }));
4377                index = end + 1;
4378                text_start = index;
4379                continue;
4380            } else {
4381                diagnostics.push(Diagnostic::new(
4382                    DiagnosticSeverity::Error,
4383                    DiagnosticCode::InvalidMdx,
4384                    Span::new(base_offset + index, base_offset + input.len()),
4385                    "MDX expression is missing a closing brace",
4386                ));
4387            }
4388        }
4389
4390        if bytes[index] == b':' && options.constructs.shortcode {
4391            if let Some((end, name)) = parse_shortcode(input, index) {
4392                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4393                nodes.push(Inline::Shortcode(Shortcode {
4394                    meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4395                    name,
4396                }));
4397                index = end;
4398                text_start = index;
4399                continue;
4400            }
4401        }
4402
4403        if bytes[index] == b':' && options.constructs.directive_text {
4404            if let Some((end, directive)) =
4405                parse_text_directive(input, index, base_offset, options, definitions, diagnostics)
4406            {
4407                flush_text(&mut nodes, &mut text, text_start, base_offset + index);
4408                nodes.push(directive);
4409                index = end;
4410                text_start = index;
4411                continue;
4412            }
4413        }
4414
4415        let (next_index, char) = next_char(input, index).expect("valid UTF-8 byte index");
4416        if text.is_empty() {
4417            text_start = base_offset + index;
4418        }
4419        text.push(if char == '\0' { '\u{FFFD}' } else { char });
4420        index = next_index;
4421    }
4422
4423    flush_text(&mut nodes, &mut text, text_start, base_offset + input.len());
4424    process_emphasis(nodes, delimiters)
4425}
4426
4427fn parse_shortcode(input: &str, index: usize) -> Option<(usize, String)> {
4428    if input[index..].starts_with("::") {
4429        return None;
4430    }
4431
4432    let mut cursor = index + 1;
4433    while let Some((next, char)) = next_char(input, cursor) {
4434        if char == ':' {
4435            if cursor == index + 1 {
4436                return None;
4437            }
4438            return Some((next, input[index + 1..cursor].into()));
4439        }
4440        if !(char.is_ascii_alphanumeric() || matches!(char, '_' | '-' | '+')) {
4441            return None;
4442        }
4443        cursor = next;
4444    }
4445    None
4446}
4447
4448fn parse_wikilink(
4449    input: &str,
4450    index: usize,
4451    base_offset: usize,
4452    options: &SyntaxOptions,
4453) -> Option<(usize, Inline)> {
4454    let configured_order = if options.constructs.wikilink_title_after_pipe {
4455        WikiLinkLabelOrder::AfterPipe
4456    } else if options.constructs.wikilink_title_before_pipe {
4457        WikiLinkLabelOrder::BeforePipe
4458    } else {
4459        return None;
4460    };
4461    if input.as_bytes().get(index) != Some(&b'[') || input.as_bytes().get(index + 1) != Some(&b'[')
4462    {
4463        return None;
4464    }
4465
4466    let close = find_wikilink_close(input, index + 2)?;
4467    let source = &input[index + 2..close];
4468    if source.is_empty() || source.len() > WIKILINK_MAX_BYTES {
4469        return None;
4470    }
4471
4472    let (target_source, label_source, label_order) =
4473        if let Some(separator) = find_wikilink_separator(source) {
4474            match configured_order {
4475                WikiLinkLabelOrder::AfterPipe => (
4476                    &source[..separator],
4477                    &source[separator + 1..],
4478                    WikiLinkLabelOrder::AfterPipe,
4479                ),
4480                WikiLinkLabelOrder::BeforePipe => (
4481                    &source[separator + 1..],
4482                    &source[..separator],
4483                    WikiLinkLabelOrder::BeforePipe,
4484                ),
4485            }
4486        } else {
4487            (source, source, configured_order)
4488        };
4489
4490    let target = unescape_string(target_source);
4491    if target.is_empty() {
4492        return None;
4493    }
4494    let label = unescape_string(label_source);
4495    let end = close + 2;
4496    Some((
4497        end,
4498        Inline::WikiLink(WikiLink {
4499            meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + end))),
4500            target,
4501            label,
4502            label_order,
4503        }),
4504    ))
4505}
4506
4507fn find_wikilink_close(input: &str, start: usize) -> Option<usize> {
4508    let bytes = input.as_bytes();
4509    let mut cursor = start;
4510    while cursor < input.len() {
4511        match bytes[cursor] {
4512            b'\\' => {
4513                cursor += 1;
4514                if cursor < input.len() {
4515                    cursor = next_char(input, cursor)?.0;
4516                }
4517            }
4518            b'\n' | b'\r' => return None,
4519            b']' if bytes.get(cursor + 1) == Some(&b']') => return Some(cursor),
4520            _ => cursor = next_char(input, cursor)?.0,
4521        }
4522    }
4523    None
4524}
4525
4526fn find_wikilink_separator(input: &str) -> Option<usize> {
4527    let bytes = input.as_bytes();
4528    let mut cursor = 0;
4529    while cursor < input.len() {
4530        match bytes[cursor] {
4531            b'\\' => {
4532                cursor += 1;
4533                if cursor < input.len() {
4534                    cursor = next_char(input, cursor)?.0;
4535                }
4536            }
4537            b'|' => return Some(cursor),
4538            _ => cursor = next_char(input, cursor)?.0,
4539        }
4540    }
4541    None
4542}
4543
4544fn trailing_space_count(input: &str) -> usize {
4545    input
4546        .as_bytes()
4547        .iter()
4548        .rev()
4549        .take_while(|byte| matches!(**byte, b' ' | b'\t'))
4550        .count()
4551}
4552
4553fn is_hard_break_suffix(input: &str, trailing: usize) -> bool {
4554    // A hard line break is two or more spaces immediately before the newline
4555    // with no intervening tab; a tab anywhere in the trailing whitespace run
4556    // demotes it to a soft break.
4557    let bytes = input.as_bytes();
4558    trailing >= 2
4559        && bytes[bytes.len() - trailing..]
4560            .iter()
4561            .all(|byte| *byte == b' ')
4562}
4563
4564fn parse_image(
4565    input: &str,
4566    index: usize,
4567    base_offset: usize,
4568    options: &SyntaxOptions,
4569    definitions: &[String],
4570    diagnostics: &mut Vec<Diagnostic>,
4571) -> Option<(usize, Inline)> {
4572    let label_start = index + 2;
4573    let label_end = find_link_label_end(input, index + 1)?;
4574    let alt_source = &input[label_start..label_end];
4575    let after_label = label_end + 1;
4576    if input.as_bytes().get(after_label) == Some(&b'(') {
4577        let (close, resource) = parse_link_resource(input, after_label)?;
4578        return Some((
4579            close,
4580            Inline::Image(Image {
4581                meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + close))),
4582                destination: resource.destination,
4583                destination_kind: resource.destination_kind,
4584                title: resource.title,
4585                title_kind: resource.title_kind,
4586                alt: parse_inlines(
4587                    alt_source,
4588                    base_offset + label_start,
4589                    options,
4590                    definitions,
4591                    diagnostics,
4592                ),
4593            }),
4594        ));
4595    }
4596    if input.as_bytes().get(after_label) == Some(&b'[') {
4597        let close = find_reference_label_end(input, after_label)?;
4598        let label = &input[after_label + 1..close];
4599        let identifier = if label.is_empty() { alt_source } else { label };
4600        if definition_exists(definitions, identifier) {
4601            return Some((
4602                close + 1,
4603                Inline::ImageReference(ImageReference {
4604                    meta: NodeMeta::new(Some(Span::new(
4605                        base_offset + index,
4606                        base_offset + close + 1,
4607                    ))),
4608                    identifier: normalize_label(identifier),
4609                    label: identifier.into(),
4610                    kind: if label.is_empty() {
4611                        ReferenceKind::Collapsed
4612                    } else {
4613                        ReferenceKind::Full
4614                    },
4615                    alt: parse_inlines(
4616                        alt_source,
4617                        base_offset + label_start,
4618                        options,
4619                        definitions,
4620                        diagnostics,
4621                    ),
4622                }),
4623            ));
4624        }
4625        // A present `[...]` second label that resolves to no definition is not a
4626        // reference and does not fall back to a shortcut (mirrors parse_link).
4627        return None;
4628    }
4629    // Shortcut image reference `![foo]` (no following `(`/`[`) where `foo` is a
4630    // defined label — mirrors parse_link's shortcut branch.
4631    if definition_exists(definitions, alt_source) {
4632        return Some((
4633            after_label,
4634            Inline::ImageReference(ImageReference {
4635                meta: NodeMeta::new(Some(Span::new(
4636                    base_offset + index,
4637                    base_offset + after_label,
4638                ))),
4639                identifier: normalize_label(alt_source),
4640                label: alt_source.into(),
4641                kind: ReferenceKind::Shortcut,
4642                alt: parse_inlines(
4643                    alt_source,
4644                    base_offset + label_start,
4645                    options,
4646                    definitions,
4647                    diagnostics,
4648                ),
4649            }),
4650        ));
4651    }
4652    None
4653}
4654
4655fn parse_link(
4656    input: &str,
4657    index: usize,
4658    base_offset: usize,
4659    options: &SyntaxOptions,
4660    definitions: &[String],
4661    diagnostics: &mut Vec<Diagnostic>,
4662    context: InlineContext,
4663) -> Option<(usize, Inline)> {
4664    if !context.allow_links {
4665        return None;
4666    }
4667    let label_end = find_link_label_end(input, index)?;
4668    let label_source = &input[index + 1..label_end];
4669    if label_contains_link(label_source, base_offset + index + 1, options, definitions) {
4670        return None;
4671    }
4672    let after_label = label_end + 1;
4673    if input.as_bytes().get(after_label) == Some(&b'(') {
4674        // A present-but-invalid `(...)` resource is not an inline link, but
4675        // CommonMark still resolves `[label]` as a shortcut reference and leaves
4676        // the invalid `(...)` as literal text (links 568) — so fall through to
4677        // the reference branches below instead of bailing out of parse_link.
4678        if let Some((close, resource)) = parse_link_resource(input, after_label) {
4679            return Some((
4680                close,
4681                Inline::Link(Link {
4682                    meta: NodeMeta::new(Some(Span::new(base_offset + index, base_offset + close))),
4683                    destination: resource.destination,
4684                    destination_kind: resource.destination_kind,
4685                    title: resource.title,
4686                    title_kind: resource.title_kind,
4687                    children: parse_inlines_with_context(
4688                        label_source,
4689                        base_offset + index + 1,
4690                        options,
4691                        definitions,
4692                        diagnostics,
4693                        InlineContext { allow_links: false },
4694                    ),
4695                }),
4696            ));
4697        }
4698    }
4699    if input.as_bytes().get(after_label) == Some(&b'[') {
4700        let close = find_reference_label_end(input, after_label)?;
4701        let label = &input[after_label + 1..close];
4702        let identifier = if label.is_empty() {
4703            label_source
4704        } else {
4705            label
4706        };
4707        if definition_exists(definitions, identifier) {
4708            return Some((
4709                close + 1,
4710                Inline::LinkReference(LinkReference {
4711                    meta: NodeMeta::new(Some(Span::new(
4712                        base_offset + index,
4713                        base_offset + close + 1,
4714                    ))),
4715                    identifier: normalize_label(identifier),
4716                    label: identifier.into(),
4717                    kind: if label.is_empty() {
4718                        ReferenceKind::Collapsed
4719                    } else {
4720                        ReferenceKind::Full
4721                    },
4722                    children: parse_inlines_with_context(
4723                        label_source,
4724                        base_offset + index + 1,
4725                        options,
4726                        definitions,
4727                        diagnostics,
4728                        InlineContext { allow_links: false },
4729                    ),
4730                }),
4731            ));
4732        }
4733        // A present `[...]` second label that resolves to no definition is NOT a
4734        // link, and CommonMark does not fall back to treating the first label as
4735        // a shortcut (`[x][ ]`, `[x][undef]` stay literal). Only a truly absent
4736        // `[...]` reaches the shortcut path below.
4737        return None;
4738    }
4739    if definition_exists(definitions, label_source) {
4740        return Some((
4741            after_label,
4742            Inline::LinkReference(LinkReference {
4743                meta: NodeMeta::new(Some(Span::new(
4744                    base_offset + index,
4745                    base_offset + after_label,
4746                ))),
4747                identifier: normalize_label(label_source),
4748                label: label_source.into(),
4749                kind: ReferenceKind::Shortcut,
4750                children: parse_inlines_with_context(
4751                    label_source,
4752                    base_offset + index + 1,
4753                    options,
4754                    definitions,
4755                    diagnostics,
4756                    InlineContext { allow_links: false },
4757                ),
4758            }),
4759        ));
4760    }
4761    None
4762}
4763
4764fn find_reference_label_end(input: &str, open: usize) -> Option<usize> {
4765    // A reference/definition link label does not nest: it ends at the first
4766    // unescaped `]`, and an unescaped interior `[` disqualifies it.
4767    if input.as_bytes().get(open) != Some(&b'[') {
4768        return None;
4769    }
4770
4771    let mut cursor = open + 1;
4772    while cursor < input.len() {
4773        let (next, char) = next_char(input, cursor)?;
4774        match char {
4775            '\\' => {
4776                cursor = next_char(input, next)
4777                    .map(|(after_escape, _)| after_escape)
4778                    .unwrap_or(next);
4779                continue;
4780            }
4781            '[' => return None,
4782            ']' => {
4783                return reference_label_is_within_limit(&input[open + 1..cursor]).then_some(cursor);
4784            }
4785            _ => {}
4786        }
4787        cursor = next;
4788    }
4789    None
4790}
4791
4792fn label_contains_link(
4793    label_source: &str,
4794    base_offset: usize,
4795    options: &SyntaxOptions,
4796    definitions: &[String],
4797) -> bool {
4798    let mut diagnostics = Vec::new();
4799    let inlines = parse_inlines_with_context(
4800        label_source,
4801        base_offset,
4802        options,
4803        definitions,
4804        &mut diagnostics,
4805        InlineContext::default(),
4806    );
4807    contains_link_inline(&inlines)
4808}
4809
4810fn contains_link_inline(inlines: &[Inline]) -> bool {
4811    inlines.iter().any(|inline| match inline {
4812        Inline::Link(_) | Inline::LinkReference(_) => true,
4813        Inline::Emphasis(node) => contains_link_inline(&node.children),
4814        Inline::Strong(node) => contains_link_inline(&node.children),
4815        Inline::Delete(node) => contains_link_inline(&node.children),
4816        Inline::TextDirective(node) => contains_link_inline(&node.label),
4817        _ => false,
4818    })
4819}
4820
4821fn find_link_label_end(input: &str, open: usize) -> Option<usize> {
4822    if input.as_bytes().get(open) != Some(&b'[') {
4823        return None;
4824    }
4825
4826    let mut depth = 1usize;
4827    let mut cursor = open + 1;
4828    while cursor < input.len() {
4829        let (next, char) = next_char(input, cursor)?;
4830        match char {
4831            '\\' => {
4832                cursor = next_char(input, next)
4833                    .map(|(after_escape, _)| after_escape)
4834                    .unwrap_or(next);
4835                continue;
4836            }
4837            '`' => {
4838                if let Some((end, _)) = parse_code_span(input, cursor) {
4839                    cursor = end;
4840                    continue;
4841                }
4842            }
4843            '<' => {
4844                if let Some(end) = parse_autolink_end(input, cursor) {
4845                    let raw = &input[cursor..end];
4846                    if is_autolink(raw) {
4847                        cursor = end;
4848                        continue;
4849                    }
4850                }
4851                if let Some((end, _)) = parse_html_inline(input, cursor) {
4852                    cursor = end;
4853                    continue;
4854                }
4855            }
4856            '[' => depth += 1,
4857            ']' => {
4858                depth = depth.checked_sub(1)?;
4859                if depth == 0 {
4860                    return Some(cursor);
4861                }
4862            }
4863            _ => {}
4864        }
4865        cursor = next;
4866    }
4867    None
4868}
4869
4870fn parse_text_directive(
4871    input: &str,
4872    index: usize,
4873    base_offset: usize,
4874    options: &SyntaxOptions,
4875    definitions: &[String],
4876    diagnostics: &mut Vec<Diagnostic>,
4877) -> Option<(usize, Inline)> {
4878    if input[index..].starts_with("::") {
4879        return None;
4880    }
4881    if index > 0 {
4882        let previous = input[..index].chars().next_back()?;
4883        if !previous.is_whitespace() && !matches!(previous, '(' | '[' | '{') {
4884            return None;
4885        }
4886    }
4887    let opener_source = &input[index + 1..];
4888    let (name, label_source, attributes, consumed) = match parse_directive_opener(opener_source) {
4889        Some(opener) => opener,
4890        None => {
4891            if directive_opener_looks_malformed(opener_source) {
4892                diagnostics.push(Diagnostic::new(
4893                    DiagnosticSeverity::Error,
4894                    DiagnosticCode::InvalidDirectiveName,
4895                    Span::new(base_offset + index, base_offset + input.len()),
4896                    "text directive opener is malformed",
4897                ));
4898            }
4899            return None;
4900        }
4901    };
4902    let label = label_source
4903        .map(|source| {
4904            parse_inlines(
4905                source,
4906                base_offset + index + 1 + name.len() + 1,
4907                options,
4908                definitions,
4909                diagnostics,
4910            )
4911        })
4912        .unwrap_or_default();
4913    Some((
4914        index + 1 + consumed,
4915        Inline::TextDirective(TextDirective {
4916            meta: NodeMeta::new(Some(Span::new(
4917                base_offset + index,
4918                base_offset + index + 1 + consumed,
4919            ))),
4920            name,
4921            label,
4922            attributes,
4923        }),
4924    ))
4925}
4926
4927fn parse_directive_opener(
4928    input: &str,
4929) -> Option<(String, Option<&str>, Vec<DirectiveAttribute>, usize)> {
4930    let mut index = 0;
4931    while let Some((next, char)) = next_char(input, index) {
4932        if char.is_ascii_alphanumeric() || char == '_' || char == '-' {
4933            index = next;
4934        } else {
4935            break;
4936        }
4937    }
4938    let name = &input[..index];
4939    if !is_directive_name(name) {
4940        return None;
4941    }
4942
4943    let mut label = None;
4944    let mut attributes = Vec::new();
4945    let mut consumed = index;
4946    if input.as_bytes().get(consumed) == Some(&b'[') {
4947        let close = find_link_label_end(input, consumed)?;
4948        label = Some(&input[consumed + 1..close]);
4949        consumed = close + 1;
4950    }
4951    if input.as_bytes().get(consumed) == Some(&b'{') {
4952        let close = find_directive_attributes_close(input, consumed)?;
4953        attributes = parse_attributes(&input[consumed + 1..close]);
4954        consumed = close + 1;
4955    }
4956
4957    Some((name.into(), label, attributes, consumed))
4958}
4959
4960fn directive_opener_looks_malformed(input: &str) -> bool {
4961    let mut index = 0;
4962    while let Some((next, char)) = next_char(input, index) {
4963        if char.is_ascii_alphanumeric() || char == '_' || char == '-' {
4964            index = next;
4965        } else {
4966            break;
4967        }
4968    }
4969    index > 0
4970        && is_directive_name(&input[..index])
4971        && matches!(input.as_bytes().get(index), Some(b'[' | b'{'))
4972}
4973
4974fn find_directive_attributes_close(input: &str, open: usize) -> Option<usize> {
4975    if input.as_bytes().get(open) != Some(&b'{') {
4976        return None;
4977    }
4978
4979    let bytes = input.as_bytes();
4980    let mut cursor = open + 1;
4981    let mut quote = None;
4982    let mut escaped = false;
4983    while cursor < input.len() {
4984        let byte = bytes[cursor];
4985        if escaped {
4986            escaped = false;
4987            cursor += 1;
4988            continue;
4989        }
4990        if byte == b'\\' {
4991            escaped = true;
4992            cursor += 1;
4993            continue;
4994        }
4995        if let Some(delimiter) = quote {
4996            if byte == delimiter {
4997                quote = None;
4998            }
4999            cursor += 1;
5000            continue;
5001        }
5002        match byte {
5003            b'"' | b'\'' => quote = Some(byte),
5004            b'}' => return Some(cursor),
5005            _ => {}
5006        }
5007        cursor += 1;
5008    }
5009    None
5010}
5011
5012fn parse_attributes(input: &str) -> Vec<DirectiveAttribute> {
5013    let mut attributes = Vec::new();
5014    let mut cursor = 0;
5015    while cursor < input.len() {
5016        cursor = skip_spaces(input, cursor);
5017        if cursor >= input.len() {
5018            break;
5019        }
5020
5021        if input.as_bytes().get(cursor) == Some(&b'#') {
5022            let (id, next) = parse_attribute_token(input, cursor + 1);
5023            if !id.is_empty() {
5024                attributes.push(DirectiveAttribute {
5025                    name: "id".into(),
5026                    value: Some(id.into()),
5027                });
5028            }
5029            cursor = next;
5030            continue;
5031        }
5032
5033        if input.as_bytes().get(cursor) == Some(&b'.') {
5034            let (class, next) = parse_attribute_token(input, cursor + 1);
5035            if !class.is_empty() {
5036                attributes.push(DirectiveAttribute {
5037                    name: "class".into(),
5038                    value: Some(class.into()),
5039                });
5040            }
5041            cursor = next;
5042            continue;
5043        }
5044
5045        let (name, next) = parse_attribute_name(input, cursor);
5046        if name.is_empty() {
5047            break;
5048        }
5049        cursor = skip_spaces(input, next);
5050        if input.as_bytes().get(cursor) == Some(&b'=') {
5051            cursor = skip_spaces(input, cursor + 1);
5052            if let Some((value, next)) = parse_attribute_value(input, cursor) {
5053                attributes.push(DirectiveAttribute {
5054                    name: name.into(),
5055                    value: Some(value),
5056                });
5057                cursor = next;
5058            } else {
5059                attributes.push(DirectiveAttribute {
5060                    name: name.into(),
5061                    value: Some(String::new()),
5062                });
5063            }
5064        } else {
5065            attributes.push(DirectiveAttribute {
5066                name: name.into(),
5067                value: None,
5068            });
5069        }
5070    }
5071    attributes
5072}
5073
5074fn parse_attribute_token(input: &str, index: usize) -> (&str, usize) {
5075    let mut cursor = index;
5076    while let Some((next, char)) = next_char(input, cursor) {
5077        if char.is_whitespace() {
5078            break;
5079        }
5080        cursor = next;
5081    }
5082    (&input[index..cursor], cursor)
5083}
5084
5085fn parse_attribute_name(input: &str, index: usize) -> (&str, usize) {
5086    let mut cursor = index;
5087    while let Some((next, char)) = next_char(input, cursor) {
5088        if char.is_whitespace() || char == '=' {
5089            break;
5090        }
5091        cursor = next;
5092    }
5093    (&input[index..cursor], cursor)
5094}
5095
5096fn parse_attribute_value(input: &str, index: usize) -> Option<(String, usize)> {
5097    let quote = input.as_bytes().get(index).copied();
5098    if matches!(quote, Some(b'"' | b'\'')) {
5099        let quote = quote?;
5100        let mut cursor = index + 1;
5101        while cursor < input.len() {
5102            let (next, char) = next_char(input, cursor)?;
5103            if char as u8 == quote && !is_escaped_at(input, cursor) {
5104                return Some((unescape_ascii_punctuation(&input[index + 1..cursor]), next));
5105            }
5106            cursor = next;
5107        }
5108        return None;
5109    }
5110
5111    let (value, next) = parse_attribute_token(input, index);
5112    Some((
5113        unescape_selected(value, |char| matches!(char, '\\' | '&')),
5114        next,
5115    ))
5116}
5117
5118struct CodeSpanSource {
5119    value: String,
5120    raw: String,
5121    fence_length: usize,
5122}
5123
5124fn parse_code_span(input: &str, index: usize) -> Option<(usize, CodeSpanSource)> {
5125    let len = input[index..]
5126        .as_bytes()
5127        .iter()
5128        .take_while(|byte| **byte == b'`')
5129        .count();
5130    let search_start = index + len;
5131    let close = find_code_span_close(input, search_start, len)?;
5132    let raw = &input[search_start..close];
5133    Some((
5134        close + len,
5135        CodeSpanSource {
5136            value: normalize_code_span(raw),
5137            raw: raw.into(),
5138            fence_length: len,
5139        },
5140    ))
5141}
5142
5143fn find_code_span_close(input: &str, start: usize, marker_len: usize) -> Option<usize> {
5144    let bytes = input.as_bytes();
5145    let mut cursor = start;
5146    while cursor < bytes.len() {
5147        if bytes[cursor] != b'`' {
5148            cursor = next_char(input, cursor)
5149                .map(|(next, _)| next)
5150                .unwrap_or(bytes.len());
5151            continue;
5152        }
5153        let run_len = bytes[cursor..]
5154            .iter()
5155            .take_while(|byte| **byte == b'`')
5156            .count();
5157        if run_len == marker_len {
5158            return Some(cursor);
5159        }
5160        cursor += run_len;
5161    }
5162    None
5163}
5164
5165fn normalize_code_span(input: &str) -> String {
5166    let mut normalized = String::new();
5167    let mut cursor = 0;
5168    while cursor < input.len() {
5169        let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5170        if char == '\r' {
5171            if input.as_bytes().get(next) == Some(&b'\n') {
5172                cursor = next + 1;
5173            } else {
5174                cursor = next;
5175            }
5176            normalized.push(' ');
5177            continue;
5178        }
5179        if char == '\n' {
5180            normalized.push(' ');
5181            cursor = next;
5182            continue;
5183        }
5184        normalized.push(char);
5185        cursor = next;
5186    }
5187
5188    if normalized.starts_with(' ')
5189        && normalized.ends_with(' ')
5190        && normalized.chars().any(|char| char != ' ')
5191    {
5192        normalized[1..normalized.len() - 1].into()
5193    } else {
5194        normalized
5195    }
5196}
5197
5198fn can_open_delimited(input: &str, index: usize, marker_len: usize) -> bool {
5199    delimiter_flanking(input, index, marker_len).left
5200}
5201
5202fn can_close_delimited(input: &str, index: usize, marker_len: usize) -> bool {
5203    delimiter_flanking(input, index, marker_len).right
5204}
5205
5206fn find_closing_delimiter(
5207    input: &str,
5208    start: usize,
5209    marker: &str,
5210    underscore: bool,
5211) -> Option<usize> {
5212    let marker_len = marker.len();
5213    let mut cursor = start;
5214    let mut nested = 0usize;
5215    while cursor <= input.len() {
5216        let candidate = input[cursor..].find(marker).map(|offset| cursor + offset)?;
5217        if is_escaped_at(input, candidate) {
5218            cursor = candidate + marker_len;
5219            continue;
5220        }
5221        if delimiter_candidate_precedes_link_close(input, start, candidate, marker_len) {
5222            cursor = candidate + marker_len;
5223            continue;
5224        }
5225        if marker_len == 1 && nested == 0 && starts_longer_delimiter_run(input, candidate, marker) {
5226            cursor = candidate + delimiter_run_len(input, candidate, marker);
5227            continue;
5228        }
5229
5230        let can_open = if underscore {
5231            can_open_underscore(input, candidate, marker_len)
5232        } else {
5233            can_open_delimited(input, candidate, marker_len)
5234        };
5235        let can_close = if underscore {
5236            can_close_underscore(input, candidate, marker_len)
5237        } else {
5238            can_close_delimited(input, candidate, marker_len)
5239        };
5240
5241        if can_close {
5242            if nested == 0 {
5243                return Some(candidate);
5244            }
5245            nested -= 1;
5246            cursor = candidate + marker_len;
5247            continue;
5248        }
5249        if can_open {
5250            nested += 1;
5251        }
5252        cursor = candidate + marker_len;
5253    }
5254    None
5255}
5256
5257fn find_single_tilde_delete_close(input: &str, start: usize) -> Option<usize> {
5258    let mut cursor = start;
5259    while cursor < input.len() {
5260        let Some(candidate) = input[cursor..].find('~').map(|index| cursor + index) else {
5261            break;
5262        };
5263        if !is_escaped_at(input, candidate) && single_tilde_can_close_delete(input, candidate) {
5264            return Some(candidate);
5265        }
5266        cursor = candidate + 1;
5267    }
5268    None
5269}
5270
5271fn single_tilde_can_open_delete(input: &str, index: usize) -> bool {
5272    starts_exact_byte_run(input, index, b'~', 1)
5273        && can_open_delimited(input, index, 1)
5274        && !tilde_is_alphanumeric_interior(input, index)
5275}
5276
5277fn single_tilde_can_close_delete(input: &str, index: usize) -> bool {
5278    starts_exact_byte_run(input, index, b'~', 1)
5279        && can_close_delimited(input, index, 1)
5280        && !tilde_is_alphanumeric_interior(input, index)
5281}
5282
5283fn single_tilde_delete_takes_precedence(
5284    options: &SyntaxOptions,
5285    input: &str,
5286    index: usize,
5287) -> bool {
5288    options.constructs.gfm_strikethrough
5289        && options.parse.single_tilde_strikethrough
5290        && single_tilde_can_open_delete(input, index)
5291        && find_single_tilde_delete_close(input, index + 1).is_some()
5292}
5293
5294fn tilde_is_alphanumeric_interior(input: &str, index: usize) -> bool {
5295    let previous = input[..index].chars().next_back();
5296    let next = input[index + 1..].chars().next();
5297    previous.is_some_and(|char| char.is_alphanumeric())
5298        && next.is_some_and(|char| char.is_alphanumeric())
5299}
5300
5301fn starts_exact_byte_run(input: &str, index: usize, marker: u8, len: usize) -> bool {
5302    input.as_bytes().get(index) == Some(&marker)
5303        && delimiter_byte_run_start(input, index, marker) == index
5304        && delimiter_byte_run_len(input, index, marker) == len
5305}
5306
5307fn delimiter_byte_run_start(input: &str, index: usize, marker: u8) -> usize {
5308    let bytes = input.as_bytes();
5309    let mut start = index;
5310    while start > 0 && bytes[start - 1] == marker && !is_escaped_at(input, start - 1) {
5311        start -= 1;
5312    }
5313    start
5314}
5315
5316fn delimiter_byte_run_len(input: &str, index: usize, marker: u8) -> usize {
5317    let bytes = input.as_bytes();
5318    let mut cursor = index;
5319    while bytes.get(cursor) == Some(&marker) {
5320        cursor += 1;
5321    }
5322    cursor - index
5323}
5324
5325fn find_simple_inline_close(input: &str, start: usize, marker: u8) -> Option<usize> {
5326    let bytes = input.as_bytes();
5327    let mut cursor = start;
5328    while cursor < input.len() {
5329        match bytes[cursor] {
5330            b'\\' => {
5331                cursor += 1;
5332                if cursor < input.len() {
5333                    cursor = next_char(input, cursor)?.0;
5334                }
5335            }
5336            b'\n' | b'\r' => return None,
5337            byte if byte == marker => return (cursor > start).then_some(cursor),
5338            _ => cursor = next_char(input, cursor)?.0,
5339        }
5340    }
5341    None
5342}
5343
5344fn find_spoiler_close(input: &str, start: usize) -> Option<usize> {
5345    let bytes = input.as_bytes();
5346    let mut cursor = start;
5347    while cursor + 1 < input.len() {
5348        match bytes[cursor] {
5349            b'\\' => {
5350                cursor += 1;
5351                if cursor < input.len() {
5352                    cursor = next_char(input, cursor)?.0;
5353                }
5354            }
5355            b'\n' | b'\r' => return None,
5356            b'|' if bytes.get(cursor + 1) == Some(&b'|')
5357                && cursor > start
5358                && bytes.get(cursor.wrapping_sub(1)) != Some(&b'|') =>
5359            {
5360                return Some(cursor);
5361            }
5362            _ => cursor = next_char(input, cursor)?.0,
5363        }
5364    }
5365    None
5366}
5367
5368fn starts_longer_delimiter_run(input: &str, index: usize, marker: &str) -> bool {
5369    input[index..].starts_with(marker)
5370        && !input[..index].ends_with(marker)
5371        && input[index + marker.len()..].starts_with(marker)
5372}
5373
5374fn delimiter_run_len(input: &str, index: usize, marker: &str) -> usize {
5375    let mut cursor = index;
5376    while input[cursor..].starts_with(marker) {
5377        cursor += marker.len();
5378    }
5379    cursor - index
5380}
5381
5382fn delimiter_candidate_precedes_link_close(
5383    input: &str,
5384    start: usize,
5385    candidate: usize,
5386    marker_len: usize,
5387) -> bool {
5388    let bytes = input.as_bytes();
5389    if bytes.get(candidate + marker_len) != Some(&b']') {
5390        return false;
5391    }
5392    if !matches!(bytes.get(candidate + marker_len + 1), Some(b'(' | b'[')) {
5393        return false;
5394    }
5395
5396    let mut depth = 0usize;
5397    let mut cursor = start;
5398    while cursor < candidate {
5399        let Some((next, char)) = next_char(input, cursor) else {
5400            break;
5401        };
5402        match char {
5403            '\\' => {
5404                cursor = next_char(input, next)
5405                    .map(|(after_escape, _)| after_escape)
5406                    .unwrap_or(next);
5407                continue;
5408            }
5409            '`' => {
5410                if let Some((end, _)) = parse_code_span(input, cursor) {
5411                    cursor = end;
5412                    continue;
5413                }
5414            }
5415            '[' => depth += 1,
5416            ']' => depth = depth.saturating_sub(1),
5417            _ => {}
5418        }
5419        cursor = next;
5420    }
5421    depth > 0
5422}
5423
5424fn can_open_underscore(input: &str, index: usize, marker_len: usize) -> bool {
5425    let flanking = delimiter_flanking(input, index, marker_len);
5426    flanking.left
5427        && (!flanking.right || flanking.previous.is_some_and(|c| c.is_ascii_punctuation()))
5428}
5429
5430fn can_close_underscore(input: &str, index: usize, marker_len: usize) -> bool {
5431    let flanking = delimiter_flanking(input, index, marker_len);
5432    flanking.right && (!flanking.left || flanking.next.is_some_and(|c| c.is_ascii_punctuation()))
5433}
5434
5435#[derive(Clone, Copy)]
5436struct DelimiterFlanking {
5437    left: bool,
5438    right: bool,
5439    previous: Option<char>,
5440    next: Option<char>,
5441}
5442
5443fn delimiter_flanking(input: &str, index: usize, marker_len: usize) -> DelimiterFlanking {
5444    let previous = input[..index].chars().next_back();
5445    let next = input[index + marker_len..].chars().next();
5446
5447    let previous_whitespace = previous.is_none_or(char::is_whitespace);
5448    let next_whitespace = next.is_none_or(char::is_whitespace);
5449    let previous_punctuation = previous.is_some_and(is_flanking_punctuation);
5450    let next_punctuation = next.is_some_and(is_flanking_punctuation);
5451
5452    let left = next.is_some()
5453        && !next_whitespace
5454        && !(next_punctuation && !previous_whitespace && !previous_punctuation);
5455    let right = previous.is_some()
5456        && !previous_whitespace
5457        && !(previous_punctuation && !next_whitespace && !next_punctuation);
5458
5459    DelimiterFlanking {
5460        left,
5461        right,
5462        previous,
5463        next,
5464    }
5465}
5466
5467/// Dollar-fenced inline math, GitHub Flavored Markdown dialect.
5468///
5469/// A `$` is a flanking delimiter resolved at scan time (math is not pushed onto
5470/// the emphasis delimiter stack). An opening run of one or two `$` (runs of
5471/// three or more never form math) scans forward for a matching closing run:
5472///
5473/// * single `$`: cannot open if the next char is ASCII whitespace; the closing
5474///   `$` cannot be preceded by ASCII whitespace nor followed by an ASCII digit;
5475///   a `\$` inside is skipped (the backslash is kept verbatim, never a
5476///   delimiter); the close must be a run of exactly one `$`.
5477/// * double `$$`: no flanking and no digit guard; closes on the next run of two
5478///   `$`; content is kept verbatim and may span newlines (this is still an
5479///   inline display span — `$$` flow blocks are handled by `parse_math_block`).
5480///
5481/// The closing run is matched greedily (the nearest valid close wins), which is
5482/// equivalent to emphasis-style "nearest preceding open" because a failed open
5483/// emits a literal `$`/`$$` and the scan resumes after it. Content for the
5484/// single-`$` form is normalized like a code span (line endings → spaces, one
5485/// edge-space strip); the `$$` display form is verbatim. The `` $`…`$ `` code
5486/// form takes precedence.
5487fn parse_math_inline(input: &str, index: usize) -> Option<(usize, String, MathInlineKind)> {
5488    if let Some((end, value)) = parse_math_code_inline(input, index) {
5489        return Some((end, value, MathInlineKind::Code));
5490    }
5491
5492    let bytes = input.as_bytes();
5493    let open_dollars = bytes[index..]
5494        .iter()
5495        .take_while(|byte| **byte == b'$')
5496        .count();
5497    // The maximum math fence length is 2 dollars: a run of three or more never
5498    // opens math.
5499    if open_dollars == 0 || open_dollars > 2 {
5500        return None;
5501    }
5502
5503    let content_start = index + open_dollars;
5504    let close = scan_to_closing_dollar(input, content_start, open_dollars)?;
5505    let content_end = close - open_dollars;
5506    // The span requires `endpos - startpos >= fence_length * 2 + 1`, i.e. at
5507    // least one content byte between the open and close fences.
5508    if content_end <= content_start {
5509        return None;
5510    }
5511
5512    let raw = &input[content_start..content_end];
5513    let value = if open_dollars == 1 {
5514        normalize_math_text(raw)
5515    } else {
5516        raw.into()
5517    };
5518    let dollars = u8::try_from(open_dollars).unwrap_or(u8::MAX);
5519    Some((close, value, MathInlineKind::Dollar { dollars }))
5520}
5521
5522/// Scans for the closing dollar run. `start` is the first content byte
5523/// (just past the opening run); returns the byte offset just past a matching
5524/// closing run of exactly `open_dollars` `$`.
5525fn scan_to_closing_dollar(input: &str, start: usize, open_dollars: usize) -> Option<usize> {
5526    let bytes = input.as_bytes();
5527    // A space immediately after a single opening `$` forbids the open.
5528    if open_dollars == 1 && bytes.get(start).is_some_and(|byte| is_math_space(*byte)) {
5529        return None;
5530    }
5531
5532    let mut cursor = start;
5533    loop {
5534        while cursor < bytes.len() && bytes[cursor] != b'$' {
5535            cursor += 1;
5536        }
5537        if cursor >= bytes.len() {
5538            return None;
5539        }
5540        // `cursor` now points at the first `$` of a potential closing run; the
5541        // char just before it gates the single-`$` flanking and escape rules.
5542        let prev = bytes[cursor - 1];
5543        if open_dollars == 1 && is_math_space(prev) {
5544            return None;
5545        }
5546        if open_dollars == 1 && prev == b'\\' {
5547            // An escaped `\$` is content, not a delimiter: skip this one `$` and
5548            // keep scanning (the backslash stays in the content verbatim).
5549            cursor += 1;
5550            continue;
5551        }
5552        let run = bytes[cursor..]
5553            .iter()
5554            .take(open_dollars)
5555            .take_while(|byte| **byte == b'$')
5556            .count();
5557        // The single-`$` close cannot be followed by an ASCII digit.
5558        if open_dollars == 1 && bytes.get(cursor + run).is_some_and(u8::is_ascii_digit) {
5559            return None;
5560        }
5561        if run == open_dollars {
5562            return Some(cursor + run);
5563        }
5564        cursor += run;
5565    }
5566}
5567
5568/// Math whitespace: ASCII tab, line feed, carriage return, and space.
5569fn is_math_space(byte: u8) -> bool {
5570    matches!(byte, b'\t' | b'\n' | b'\r' | b' ')
5571}
5572
5573/// Applies the code-span content rules to dollar-fenced math: line endings
5574/// become single spaces, then if the content begins AND ends with U+0020 and is
5575/// not entirely spaces, one space is stripped from each edge.
5576fn normalize_math_text(input: &str) -> String {
5577    let mut normalized = String::new();
5578    let mut cursor = 0;
5579    while cursor < input.len() {
5580        let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5581        if char == '\r' {
5582            if input.as_bytes().get(next) == Some(&b'\n') {
5583                cursor = next + 1;
5584            } else {
5585                cursor = next;
5586            }
5587            normalized.push(' ');
5588            continue;
5589        }
5590        if char == '\n' {
5591            normalized.push(' ');
5592            cursor = next;
5593            continue;
5594        }
5595        normalized.push(char);
5596        cursor = next;
5597    }
5598
5599    if normalized.starts_with(' ')
5600        && normalized.ends_with(' ')
5601        && normalized.chars().any(|char| char != ' ')
5602    {
5603        normalized[1..normalized.len() - 1].into()
5604    } else {
5605        normalized
5606    }
5607}
5608
5609fn parse_math_code_inline(input: &str, index: usize) -> Option<(usize, String)> {
5610    if !input[index..].starts_with("$`") {
5611        return None;
5612    }
5613
5614    let search_start = index + 2;
5615    let close = input[search_start..]
5616        .find("`$")
5617        .map(|offset| search_start + offset)?;
5618    if close == search_start {
5619        return None;
5620    }
5621
5622    Some((close + 2, input[search_start..close].into()))
5623}
5624
5625fn parse_link_resource(input: &str, open: usize) -> Option<(usize, ParsedLinkResource)> {
5626    let bytes = input.as_bytes();
5627    if bytes.get(open) != Some(&b'(') {
5628        return None;
5629    }
5630    let (mut cursor, initial_space) = skip_link_resource_space_with_info(input, open + 1)?;
5631    if bytes.get(cursor) == Some(&b')') {
5632        return Some((
5633            cursor + 1,
5634            ParsedLinkResource {
5635                destination: String::new(),
5636                destination_kind: LinkDestinationKind::Omitted,
5637                title: None,
5638                title_kind: None,
5639            },
5640        ));
5641    }
5642    if initial_space && matches!(bytes.get(cursor), Some(b'"' | b'\'' | b'(')) {
5643        let (title, title_kind, next) = parse_link_title(input, cursor)?;
5644        cursor = skip_link_resource_space(input, next)?;
5645        if bytes.get(cursor) == Some(&b')') {
5646            return Some((
5647                cursor + 1,
5648                ParsedLinkResource {
5649                    destination: String::new(),
5650                    destination_kind: LinkDestinationKind::Omitted,
5651                    title: Some(title),
5652                    title_kind: Some(title_kind),
5653                },
5654            ));
5655        }
5656        return None;
5657    }
5658    let (destination, destination_kind, next) = parse_link_destination(input, cursor)?;
5659    let (after_destination, had_space) = skip_link_resource_space_with_info(input, next)?;
5660    cursor = after_destination;
5661    if bytes.get(cursor) == Some(&b')') {
5662        return Some((
5663            cursor + 1,
5664            ParsedLinkResource {
5665                destination,
5666                destination_kind,
5667                title: None,
5668                title_kind: None,
5669            },
5670        ));
5671    }
5672    if !had_space {
5673        return None;
5674    }
5675
5676    let (title, title_kind, next) = parse_link_title(input, cursor)?;
5677    cursor = skip_link_resource_space(input, next)?;
5678    if bytes.get(cursor) == Some(&b')') {
5679        Some((
5680            cursor + 1,
5681            ParsedLinkResource {
5682                destination,
5683                destination_kind,
5684                title: Some(title),
5685                title_kind: Some(title_kind),
5686            },
5687        ))
5688    } else {
5689        None
5690    }
5691}
5692
5693fn parse_link_destination(
5694    input: &str,
5695    index: usize,
5696) -> Option<(String, LinkDestinationKind, usize)> {
5697    if input.as_bytes().get(index) == Some(&b'<') {
5698        let mut cursor = index + 1;
5699        while cursor < input.len() {
5700            let (next, char) = next_char(input, cursor)?;
5701            if char == '>' && !is_escaped_at(input, cursor) {
5702                return Some((
5703                    unescape_ascii_punctuation(&input[index + 1..cursor]),
5704                    LinkDestinationKind::Angle,
5705                    next,
5706                ));
5707            }
5708            if (char == '<' && !is_escaped_at(input, cursor)) || char == '\n' || char == '\r' {
5709                return None;
5710            }
5711            cursor = next;
5712        }
5713        return None;
5714    }
5715
5716    let mut cursor = index;
5717    let mut depth = 0usize;
5718    while cursor < input.len() {
5719        let (next, char) = next_char(input, cursor)?;
5720        // A bare destination terminates on ASCII space or an ASCII control
5721        // character; Unicode whitespace (e.g. U+00A0) is ordinary. A backslash
5722        // before a space is NOT an escape (only ASCII punctuation is escapable),
5723        // so `\ ` still terminates the destination → `[a](\ b)` is not a link.
5724        if (char == ' ' || char.is_ascii_control()) && depth == 0 {
5725            break;
5726        }
5727        if char == '(' && !is_escaped_at(input, cursor) {
5728            depth += 1;
5729            // CommonMark caps balanced parens in a bare destination at depth 32.
5730            if depth > 32 {
5731                return None;
5732            }
5733        } else if char == ')' && !is_escaped_at(input, cursor) {
5734            if depth == 0 {
5735                break;
5736            }
5737            depth -= 1;
5738        }
5739        cursor = next;
5740    }
5741
5742    if cursor == index || depth > 0 {
5743        None
5744    } else {
5745        Some((
5746            unescape_ascii_punctuation(&input[index..cursor]),
5747            LinkDestinationKind::Bare,
5748            cursor,
5749        ))
5750    }
5751}
5752
5753fn parse_link_title(input: &str, index: usize) -> Option<(String, LinkTitleKind, usize)> {
5754    let opener = input.as_bytes().get(index).copied()?;
5755    let (closer, title_kind) = match opener {
5756        b'"' => ('"', LinkTitleKind::DoubleQuote),
5757        b'\'' => ('\'', LinkTitleKind::SingleQuote),
5758        b'(' => (')', LinkTitleKind::Paren),
5759        _ => return None,
5760    };
5761    let mut cursor = index + 1;
5762    while cursor < input.len() {
5763        let (next, char) = next_char(input, cursor)?;
5764        if char == closer && !is_escaped_at(input, cursor) {
5765            if contains_blank_line(&input[index + 1..cursor]) {
5766                return None;
5767            }
5768            return Some((
5769                unescape_ascii_punctuation(&input[index + 1..cursor]),
5770                title_kind,
5771                next,
5772            ));
5773        }
5774        if opener == b'(' && char == '(' && !is_escaped_at(input, cursor) {
5775            return None;
5776        }
5777        cursor = next;
5778    }
5779    None
5780}
5781
5782fn contains_blank_line(input: &str) -> bool {
5783    if !input.bytes().any(|byte| matches!(byte, b'\n' | b'\r')) {
5784        return false;
5785    }
5786    // A title that merely begins or ends with an EOL is allowed; only an INTERIOR
5787    // blank line (a blank line bounded by content on both sides) is rejected. The
5788    // empty first/last line entries that a leading/trailing newline produces are
5789    // boundary artifacts, not blank lines in the title content.
5790    let lines = collect_lines(input, 0);
5791    let interior = lines.len().saturating_sub(1);
5792    lines
5793        .iter()
5794        .take(interior)
5795        .skip(1)
5796        .any(|line| line.text.trim().is_empty())
5797}
5798
5799fn skip_link_resource_space(input: &str, index: usize) -> Option<usize> {
5800    skip_link_resource_space_with_info(input, index).map(|(index, _)| index)
5801}
5802
5803fn skip_link_resource_space_with_info(input: &str, mut index: usize) -> Option<(usize, bool)> {
5804    let mut line_breaks = 0usize;
5805    let mut had_space = false;
5806    while input
5807        .as_bytes()
5808        .get(index)
5809        .is_some_and(|byte| matches!(*byte, b' ' | b'\t' | b'\n' | b'\r'))
5810    {
5811        had_space = true;
5812        match input.as_bytes()[index] {
5813            b'\n' => {
5814                line_breaks += 1;
5815                if line_breaks > 1 {
5816                    return None;
5817                }
5818                index += 1;
5819            }
5820            b'\r' => {
5821                line_breaks += 1;
5822                if line_breaks > 1 {
5823                    return None;
5824                }
5825                if input.as_bytes().get(index + 1) == Some(&b'\n') {
5826                    index += 2;
5827                } else {
5828                    index += 1;
5829                }
5830            }
5831            _ => index += 1,
5832        }
5833    }
5834    Some((index, had_space))
5835}
5836
5837pub(crate) fn parse_character_reference(input: &str, index: usize) -> Option<(usize, String)> {
5838    let rest = input.get(index..)?;
5839    if let Some(rest) = rest
5840        .strip_prefix("&#x")
5841        .or_else(|| rest.strip_prefix("&#X"))
5842    {
5843        let digits = rest.find(';')?;
5844        if digits == 0 || digits > 6 || !rest[..digits].bytes().all(|byte| byte.is_ascii_hexdigit())
5845        {
5846            return None;
5847        }
5848        let value = u32::from_str_radix(&rest[..digits], 16).ok()?;
5849        return Some((
5850            index + 3 + digits + 1,
5851            character_reference_value(value).into(),
5852        ));
5853    }
5854    if let Some(rest) = rest.strip_prefix("&#") {
5855        let digits = rest.find(';')?;
5856        if digits == 0 || digits > 7 || !rest[..digits].bytes().all(|byte| byte.is_ascii_digit()) {
5857            return None;
5858        }
5859        let value = rest[..digits].parse::<u32>().ok()?;
5860        return Some((
5861            index + 2 + digits + 1,
5862            character_reference_value(value).into(),
5863        ));
5864    }
5865
5866    let name_end = rest.find(';')?;
5867    if name_end == 0 || name_end > 32 {
5868        return None;
5869    }
5870    let name = &rest[1..name_end];
5871    named_character_reference(name).map(|value| (index + name_end + 1, value.into()))
5872}
5873
5874/// Decode a numeric character reference codepoint to its scalar value.
5875///
5876/// This follows the CommonMark reference behavior: `U+0000`, the UTF-16
5877/// surrogate range, and codepoints beyond the Unicode scalar range decode to
5878/// `U+FFFD`; every other codepoint decodes to itself.
5879///
5880/// Two deliberate non-behaviors:
5881/// - We do NOT apply the HTML5 Windows-1252 remapping of C1 bytes; `&#128;`
5882///   decodes to `U+0080`, not the Euro sign. The CommonMark reference does not
5883///   perform that remapping.
5884/// - We do NOT extend replacement to the C0/C1 controls, DEL, or the Unicode
5885///   noncharacters the way some HTML-oriented decoders do. Keeping those as
5886///   their literal scalar is what makes the serializer's `&#xNN;` escaping of
5887///   control characters round-trip through a re-parse. The roundtrip corpus
5888///   only pins `{0 -> FFFD, 9 -> tab, 10 -> line feed, surrogate -> FFFD,
5889///   out-of-range -> FFFD}`, all of which this matches.
5890pub(crate) fn character_reference_value(value: u32) -> char {
5891    if value == 0 {
5892        '\u{FFFD}'
5893    } else {
5894        char::from_u32(value).unwrap_or('\u{FFFD}')
5895    }
5896}
5897
5898pub(crate) fn is_escaped_at(input: &str, index: usize) -> bool {
5899    let bytes = input.as_bytes();
5900    let mut cursor = index;
5901    let mut count = 0;
5902    while cursor > 0 && bytes[cursor - 1] == b'\\' {
5903        count += 1;
5904        cursor -= 1;
5905    }
5906    count % 2 == 1
5907}
5908
5909fn parse_definition_destination_title(input: &str) -> Option<ParsedLinkResource> {
5910    let (mut cursor, _) = skip_link_resource_space_with_info(input, 0)?;
5911    let (destination, destination_kind, next) = parse_link_destination(input, cursor)?;
5912    cursor = next;
5913
5914    let (next, had_space) = skip_link_resource_space_with_info(input, cursor)?;
5915    cursor = next;
5916    if cursor >= input.len() {
5917        return Some(ParsedLinkResource {
5918            destination,
5919            destination_kind,
5920            title: None,
5921            title_kind: None,
5922        });
5923    }
5924    if !had_space {
5925        return None;
5926    }
5927
5928    let (title, title_kind, next) = parse_link_title(input, cursor)?;
5929    let after_title = skip_link_resource_space(input, next)?;
5930    (after_title == input.len()).then_some(ParsedLinkResource {
5931        destination,
5932        destination_kind,
5933        title: Some(title),
5934        title_kind: Some(title_kind),
5935    })
5936}
5937
5938fn line_can_start_definition_title(input: &str) -> bool {
5939    let trimmed = input.trim_start();
5940    matches!(trimmed.as_bytes().first(), Some(b'"' | b'\'' | b'('))
5941}
5942
5943fn unescape_ascii_punctuation(input: &str) -> String {
5944    // Only ASCII punctuation is escapable (`\ ` keeps its backslash).
5945    unescape_selected(input, |char| char.is_ascii_punctuation())
5946}
5947
5948fn unescape_string(input: &str) -> String {
5949    unescape_selected(input, |char| char.is_ascii_punctuation() || char == '&')
5950}
5951
5952fn unescape_selected(input: &str, should_unescape: impl Fn(char) -> bool) -> String {
5953    let mut output = String::new();
5954    let mut cursor = 0;
5955    while cursor < input.len() {
5956        if input.as_bytes().get(cursor) == Some(&b'&') {
5957            if let Some((end, value)) = parse_character_reference(input, cursor) {
5958                output.push_str(&value);
5959                cursor = end;
5960                continue;
5961            }
5962        }
5963        let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
5964        if char == '\\' {
5965            if let Some((after_escape, escaped)) = next_char(input, next) {
5966                if should_unescape(escaped) {
5967                    output.push(escaped);
5968                } else {
5969                    output.push(char);
5970                    output.push(escaped);
5971                }
5972                cursor = after_escape;
5973            } else {
5974                output.push(char);
5975                cursor = next;
5976            }
5977        } else {
5978            output.push(if char == '\0' { '\u{FFFD}' } else { char });
5979            cursor = next;
5980        }
5981    }
5982    output
5983}
5984
5985fn push_line(output: &mut String, line: &str) {
5986    if !output.is_empty() {
5987        output.push('\n');
5988    }
5989    output.push_str(line);
5990}
5991
5992fn ensure_line_separator(output: &mut String) {
5993    if !output.is_empty() && !ends_with_line_ending(output) {
5994        output.push('\n');
5995    }
5996}
5997
5998fn ends_with_line_ending(input: &str) -> bool {
5999    input.ends_with('\n') || input.ends_with('\r')
6000}
6001
6002fn flush_text(nodes: &mut Vec<Inline>, text: &mut String, text_start: usize, end: usize) {
6003    if !text.is_empty() {
6004        nodes.push(Inline::Text(Text {
6005            meta: NodeMeta::new(Some(Span::new(text_start, end))),
6006            value: core::mem::take(text),
6007        }));
6008    }
6009}
6010
6011fn gfm_link_label_preserves_url_dot_escape(
6012    text: &str,
6013    escaped: char,
6014    options: &SyntaxOptions,
6015    context: InlineContext,
6016) -> bool {
6017    escaped == '.'
6018        && !context.allow_links
6019        && options.constructs.gfm_autolink_literal
6020        && (text.starts_with("www.") || text.starts_with("http://") || text.starts_with("https://"))
6021}
6022
6023fn next_char(input: &str, index: usize) -> Option<(usize, char)> {
6024    let char = input[index..].chars().next()?;
6025    Some((index + char.len_utf8(), char))
6026}
6027
6028/// A CommonMark "Unicode punctuation character" for emphasis/strong flanking:
6029/// ASCII punctuation plus the non-ASCII Unicode `P*`/`S*` categories. Only the
6030/// flanking classification needs the Unicode set; escape/label logic stays
6031/// ASCII-only via `char::is_ascii_punctuation`.
6032fn is_flanking_punctuation(value: char) -> bool {
6033    value.is_ascii_punctuation() || crate::unicode_punctuation::is_unicode_punctuation(value)
6034}
6035
6036/// Fold a reference label to its matching identifier. Per CommonMark, two
6037/// labels match when their RAW source (no backslash unescape, no entity decode)
6038/// agrees after collapsing internal whitespace to a single space, trimming, and
6039/// Unicode case-folding (`to_uppercase()` then `to_lowercase()`). So `[foo\!]`
6040/// does NOT match `[foo!]`, and `[&copy;]` does NOT match `[©]`.
6041///
6042/// The serializer's `normalize_reference_label` delegates here so the
6043/// Shortcut/Collapsed omission oracle stays in lockstep with this matcher.
6044pub(crate) fn normalize_label(label: &str) -> String {
6045    label
6046        // Unicode full casefold maps capital sharp S (ẞ, U+1E9E) to "ss"; Rust's
6047        // `to_uppercase` leaves it unchanged (it is already uppercase), so without
6048        // this `[ẞ]` would not match a `[SS]: …` definition (links 540). This is
6049        // the only char where `to_uppercase().to_lowercase()` diverges from the
6050        // full casefold that matters for label matching.
6051        .replace('ẞ', "ss")
6052        .split_whitespace()
6053        .collect::<Vec<_>>()
6054        .join(" ")
6055        .to_uppercase()
6056        .to_lowercase()
6057}
6058
6059fn definition_exists(definitions: &[String], label: &str) -> bool {
6060    if label.is_empty() || !reference_label_is_within_limit(label) {
6061        return false;
6062    }
6063
6064    let identifier = normalize_label(label);
6065    definitions
6066        .iter()
6067        .any(|definition| definition == &identifier)
6068}
6069
6070fn reference_label_is_within_limit(label: &str) -> bool {
6071    label.chars().take(REFERENCE_LABEL_MAX_CHARS + 1).count() <= REFERENCE_LABEL_MAX_CHARS
6072}
6073
6074fn trim_up_to_three_spaces(input: &str) -> Option<&str> {
6075    let (columns, bytes) = leading_indent(input);
6076    if columns <= 3 {
6077        Some(&input[bytes..])
6078    } else {
6079        None
6080    }
6081}
6082
6083fn fence_start(input: &str) -> Option<(FenceMarker, usize)> {
6084    let marker = match input.as_bytes().first()? {
6085        b'`' => FenceMarker::Backtick,
6086        b'~' => FenceMarker::Tilde,
6087        _ => return None,
6088    };
6089    let byte = match marker {
6090        FenceMarker::Backtick => b'`',
6091        FenceMarker::Tilde => b'~',
6092    };
6093    let length = input
6094        .as_bytes()
6095        .iter()
6096        .take_while(|item| **item == byte)
6097        .count();
6098    if length >= 3 {
6099        Some((marker, length))
6100    } else {
6101        None
6102    }
6103}
6104
6105fn fence_close(input: &str, marker: FenceMarker, length: usize) -> bool {
6106    let byte = match marker {
6107        FenceMarker::Backtick => b'`',
6108        FenceMarker::Tilde => b'~',
6109    };
6110    let count = input
6111        .as_bytes()
6112        .iter()
6113        .take_while(|item| **item == byte)
6114        .count();
6115    count >= length && input[count..].trim().is_empty()
6116}
6117
6118fn trim_closing_hashes(input: &str) -> &str {
6119    let input = input.trim_end();
6120    let hash_start = input.trim_end_matches('#').len();
6121    if hash_start == input.len() {
6122        return input;
6123    }
6124    if hash_start == 0 {
6125        return "";
6126    }
6127
6128    let before = &input[..hash_start];
6129    if before.ends_with(' ') || before.ends_with('\t') {
6130        before.trim_end()
6131    } else {
6132        input
6133    }
6134}
6135
6136fn list_marker_info(input: &str) -> Option<ListMarkerInfo<'_>> {
6137    let trimmed = trim_up_to_three_spaces(input)?;
6138    let indent = input.len() - trimmed.len();
6139    let bytes = trimmed.as_bytes();
6140    match bytes.first()? {
6141        b'-' | b'*' | b'+' if is_list_padding_byte(bytes.get(1).copied()) => {
6142            let delimiter = match bytes[0] {
6143                b'-' => ListDelimiter::Dash,
6144                b'*' => ListDelimiter::Asterisk,
6145                _ => ListDelimiter::Plus,
6146            };
6147            let (content_offset, content_indent) = list_content_offset(trimmed, 1, indent);
6148            Some(ListMarkerInfo {
6149                ordered: false,
6150                start: None,
6151                delimiter,
6152                indent,
6153                marker_len: 1,
6154                content_indent,
6155                content: &trimmed[content_offset..],
6156            })
6157        }
6158        byte if byte.is_ascii_digit() => {
6159            let mut end = 0;
6160            while bytes.get(end).is_some_and(|byte| byte.is_ascii_digit()) {
6161                end += 1;
6162            }
6163            if end > 9 {
6164                return None;
6165            }
6166            let delimiter = match bytes.get(end)? {
6167                b'.' => ListDelimiter::Period,
6168                b')' => ListDelimiter::Paren,
6169                _ => return None,
6170            };
6171            if !is_list_padding_byte(bytes.get(end + 1).copied()) {
6172                return None;
6173            }
6174            let start = trimmed[..end].parse().ok()?;
6175            let marker_len = end + 1;
6176            let (content_offset, content_indent) = list_content_offset(trimmed, marker_len, indent);
6177            Some(ListMarkerInfo {
6178                ordered: true,
6179                start: Some(start),
6180                delimiter,
6181                indent,
6182                marker_len,
6183                content_indent,
6184                content: &trimmed[content_offset..],
6185            })
6186        }
6187        _ => None,
6188    }
6189}
6190
6191fn list_content_offset(input: &str, marker_len: usize, indent: usize) -> (usize, usize) {
6192    let bytes = input.as_bytes();
6193    if bytes.get(marker_len).is_none() {
6194        return (marker_len, indent + marker_len + 1);
6195    }
6196    let mut cursor = marker_len;
6197    let mut column = indent + marker_len;
6198    let marker_end_column = column;
6199    while let Some(byte) = bytes.get(cursor) {
6200        match *byte {
6201            b' ' => column += 1,
6202            b'\t' => column += 4 - (column % 4),
6203            _ => break,
6204        }
6205        cursor += 1;
6206    }
6207    // The line is the marker followed only by whitespace: an empty item whose
6208    // first line is blank. CommonMark §5.2 fixes its content indent at marker
6209    // width + 1 regardless of how many trailing spaces follow, so content on the
6210    // next line indented one column past the marker joins the item.
6211    if cursor >= bytes.len() {
6212        return (cursor, marker_end_column + 1);
6213    }
6214    let padding_columns = column.saturating_sub(marker_end_column);
6215    if padding_columns > 0 && padding_columns <= 4 {
6216        (cursor, column)
6217    } else {
6218        (marker_len + 1, marker_end_column + 1)
6219    }
6220}
6221
6222fn list_marker_first_content<'a>(input: &'a str, marker: ListMarkerInfo<'a>) -> Cow<'a, str> {
6223    let Some(trimmed) = trim_up_to_three_spaces(input) else {
6224        return Cow::Borrowed(marker.content);
6225    };
6226    let after_marker = &trimmed[marker.marker_len..];
6227    if after_marker.starts_with('\t') {
6228        strip_leading_indent_columns_from(after_marker, 1, marker.indent + marker.marker_len)
6229    } else {
6230        Cow::Borrowed(marker.content)
6231    }
6232}
6233
6234fn is_list_padding_byte(byte: Option<u8>) -> bool {
6235    matches!(byte, None | Some(b' ' | b'\t'))
6236}
6237
6238fn same_list_marker(left: ListMarkerInfo<'_>, right: ListMarkerInfo<'_>) -> bool {
6239    // CommonMark §5.3: list items belong to the same list when they share a
6240    // bullet character or ordered delimiter. Indentation does not enter into
6241    // it — `- foo\n - bar\n  - baz` is one four-item bullet list, not three.
6242    left.ordered == right.ordered && left.delimiter == right.delimiter
6243}
6244
6245/// Whether `input` begins a *sibling* item of the current list item.
6246///
6247/// A same-delimiter marker is a sibling only when it is not indented far enough
6248/// to nest inside the current item — i.e. its indent is less than the item's
6249/// `content_indent`. A marker indented at or beyond the content start belongs to
6250/// a sublist within the item and is consumed as item content instead.
6251fn sibling_list_marker_at_line(
6252    input: &str,
6253    first_marker: ListMarkerInfo<'_>,
6254    content_indent: usize,
6255) -> bool {
6256    list_marker_info(input).is_some_and(|candidate| {
6257        same_list_marker(first_marker, candidate) && candidate.indent < content_indent
6258    })
6259}
6260
6261/// Whether `input` begins a list marker belonging to the same list as
6262/// `first_marker` (same ordered/unordered kind and delimiter). Used to tell a
6263/// marker that merely continues the current list apart from one that, by
6264/// changing the marker type, starts a new list (CommonMark §5.3).
6265fn same_list_marker_line(input: &str, first_marker: ListMarkerInfo<'_>) -> bool {
6266    list_marker_info(input).is_some_and(|candidate| same_list_marker(first_marker, candidate))
6267}
6268
6269fn next_nonblank_line(lines: &[Line<'_>], mut index: usize) -> usize {
6270    while index < lines.len() && lines[index].text.trim().is_empty() {
6271        index += 1;
6272    }
6273    index
6274}
6275
6276fn leading_indent(input: &str) -> (usize, usize) {
6277    let mut column = 0usize;
6278    let mut bytes = 0usize;
6279    for byte in input.as_bytes() {
6280        match *byte {
6281            b' ' => column += 1,
6282            b'\t' => column += 4 - (column % 4),
6283            _ => break,
6284        }
6285        bytes += 1;
6286    }
6287    (column, bytes)
6288}
6289
6290fn leading_indent_columns(input: &str) -> usize {
6291    leading_indent(input).0
6292}
6293
6294/// Removes up to `max_columns` columns of leading whitespace, stopping at the
6295/// first non-space/tab byte (tabs advance to the next 4-column tab stop). A tab
6296/// that straddles the column budget is PARTIALLY consumed: the columns beyond the
6297/// budget are re-emitted as spaces (CommonMark tab-expansion of indentation), so
6298/// the result may be an owned `String`. Whitespace already at/over the budget
6299/// (and any literal tab whose start sits at the budget) is returned verbatim.
6300fn strip_leading_indent_columns(input: &str, max_columns: usize) -> Cow<'_, str> {
6301    strip_leading_indent_columns_from(input, max_columns, 0)
6302}
6303
6304fn strip_leading_indent_columns_from(
6305    input: &str,
6306    max_columns: usize,
6307    start_column: usize,
6308) -> Cow<'_, str> {
6309    let mut column = start_column;
6310    let target_column = start_column + max_columns;
6311    for (index, byte) in input.as_bytes().iter().enumerate() {
6312        let next = match *byte {
6313            b' ' => column + 1,
6314            b'\t' => column + (4 - (column % 4)),
6315            _ => return Cow::Borrowed(&input[index..]),
6316        };
6317        if next > target_column {
6318            // A tab whose expansion crosses the budget (its start still inside the
6319            // budget) is split: the over-budget columns survive as spaces.
6320            if *byte == b'\t' && column < target_column {
6321                let residual = next - target_column;
6322                let mut owned = String::with_capacity(residual + input.len() - (index + 1));
6323                for _ in 0..residual {
6324                    owned.push(' ');
6325                }
6326                let mut rest_column = next;
6327                let mut rest_index = index + 1;
6328                while let Some(rest_byte) = input.as_bytes().get(rest_index) {
6329                    match *rest_byte {
6330                        b' ' => {
6331                            owned.push(' ');
6332                            rest_column += 1;
6333                            rest_index += 1;
6334                        }
6335                        b'\t' => {
6336                            let width = 4 - (rest_column % 4);
6337                            for _ in 0..width {
6338                                owned.push(' ');
6339                            }
6340                            rest_column += width;
6341                            rest_index += 1;
6342                        }
6343                        _ => break,
6344                    }
6345                }
6346                owned.push_str(&input[rest_index..]);
6347                return Cow::Owned(owned);
6348            }
6349            return Cow::Borrowed(&input[index..]);
6350        }
6351        column = next;
6352    }
6353    Cow::Borrowed("")
6354}
6355
6356fn strip_list_continuation(input: &str, content_indent: usize, list_indent: usize) -> Cow<'_, str> {
6357    let (indent_columns, indent_bytes) = leading_indent(input);
6358    if indent_columns >= content_indent {
6359        // Remove exactly `content_indent` columns. A tab straddling that budget
6360        // is split: the columns past the budget survive as spaces (CommonMark
6361        // tab expansion of list-item indentation), so a `\t`-only line inside a
6362        // 2-column item keeps the residual two spaces instead of vanishing.
6363        strip_leading_indent_columns(input, content_indent)
6364    } else if indent_columns > list_indent {
6365        Cow::Borrowed(&input[indent_bytes..])
6366    } else {
6367        Cow::Borrowed(trim_ascii_start(input))
6368    }
6369}
6370
6371fn take_task_marker_from_children(children: &mut [Block]) -> Option<bool> {
6372    let Some(Block::Paragraph(paragraph)) = children.first_mut() else {
6373        return None;
6374    };
6375    take_task_marker_from_inlines(&mut paragraph.children)
6376}
6377
6378fn take_task_marker_from_inlines(inlines: &mut Vec<Inline>) -> Option<bool> {
6379    let Some(Inline::Text(text)) = inlines.first() else {
6380        return None;
6381    };
6382    let first = text.value.clone();
6383
6384    if let Some((checked, consumed)) = task_marker_inline_prefix(&first) {
6385        if !first[consumed..].is_empty() || inlines_have_content_after(inlines, 1) {
6386            remove_text_prefix(inlines, consumed);
6387            return Some(checked);
6388        }
6389    }
6390
6391    if let Some(checked) = task_marker_at_text_end(&first) {
6392        if inlines
6393            .get(1)
6394            .is_some_and(|inline| matches!(inline, Inline::SoftBreak(_)))
6395            && inlines_have_content_after(inlines, 2)
6396        {
6397            inlines.remove(1);
6398            inlines.remove(0);
6399            return Some(checked);
6400        }
6401    }
6402
6403    if task_marker_split_open(&first)
6404        && inlines
6405            .get(1)
6406            .is_some_and(|inline| matches!(inline, Inline::SoftBreak(_)))
6407    {
6408        let Some(Inline::Text(next)) = inlines.get(2) else {
6409            return None;
6410        };
6411        if let Some((checked, consumed)) = task_marker_split_close_prefix(&next.value) {
6412            if !next.value[consumed..].is_empty() || inlines_have_content_after(inlines, 3) {
6413                inlines.remove(1);
6414                inlines.remove(0);
6415                remove_text_prefix(inlines, consumed);
6416                return Some(checked);
6417            }
6418        }
6419    }
6420
6421    None
6422}
6423
6424fn task_marker_inline_prefix(input: &str) -> Option<(bool, usize)> {
6425    let start = leading_trim_bytes(input);
6426    let rest = &input[start..];
6427    let checked = task_marker_checked(rest)?;
6428    let after_marker = start + 3;
6429    match input.as_bytes().get(after_marker) {
6430        Some(b' ' | b'\t') => Some((checked, after_marker + 1)),
6431        _ => None,
6432    }
6433}
6434
6435fn task_marker_at_text_end(input: &str) -> Option<bool> {
6436    let start = leading_trim_bytes(input);
6437    let rest = &input[start..];
6438    let checked = task_marker_checked(rest)?;
6439    if rest.len() == 3 {
6440        Some(checked)
6441    } else {
6442        None
6443    }
6444}
6445
6446fn task_marker_split_open(input: &str) -> bool {
6447    let start = leading_trim_bytes(input);
6448    input[start..] == *"["
6449}
6450
6451fn task_marker_split_close_prefix(input: &str) -> Option<(bool, usize)> {
6452    match input.as_bytes().get(..2)? {
6453        b"] " => Some((false, 2)),
6454        b"]\t" => Some((false, 2)),
6455        b"x]" | b"X]" if matches!(input.as_bytes().get(2), Some(b' ' | b'\t')) => Some((true, 3)),
6456        _ => None,
6457    }
6458}
6459
6460fn task_marker_checked(input: &str) -> Option<bool> {
6461    if input.starts_with("[ ]") {
6462        Some(false)
6463    } else if input.starts_with("[x]") || input.starts_with("[X]") {
6464        Some(true)
6465    } else {
6466        None
6467    }
6468}
6469
6470fn remove_text_prefix(inlines: &mut Vec<Inline>, consumed: usize) {
6471    if let Some(Inline::Text(text)) = inlines.first_mut() {
6472        text.value = text.value[consumed..].into();
6473        if text.value.is_empty() {
6474            inlines.remove(0);
6475        }
6476    }
6477}
6478
6479fn inlines_have_content_after(inlines: &[Inline], start: usize) -> bool {
6480    inlines.iter().skip(start).any(|inline| match inline {
6481        Inline::Text(text) => !text.value.is_empty(),
6482        Inline::SoftBreak(_) | Inline::LineBreak(_) => false,
6483        _ => true,
6484    })
6485}
6486
6487fn update_list_item_fence(line: &str, open_fence: &mut Option<(FenceMarker, usize)>) {
6488    let Some(trimmed) = trim_up_to_three_spaces(line) else {
6489        return;
6490    };
6491    if let Some((marker, length)) = *open_fence {
6492        if fence_close(trimmed, marker, length) {
6493            *open_fence = None;
6494        }
6495        return;
6496    }
6497    if let Some((marker, length)) = fence_start(trimmed) {
6498        *open_fence = Some((marker, length));
6499    }
6500}
6501
6502fn trim_ascii_start(input: &str) -> &str {
6503    input.trim_start_matches(|char| matches!(char, ' ' | '\t'))
6504}
6505
6506fn leading_trim_bytes(input: &str) -> usize {
6507    input.len() - trim_ascii_start(input).len()
6508}
6509
6510fn parse_table_delimiter(input: &str, spoiler: bool) -> Option<Vec<TableAlignment>> {
6511    let cells = split_table_row(input, spoiler);
6512    if cells.is_empty() {
6513        return None;
6514    }
6515    let mut alignments = Vec::new();
6516    for cell in cells {
6517        alignments.push(table_delimiter_alignment(cell.trim())?);
6518    }
6519    Some(alignments)
6520}
6521
6522// A delimiter cell is `:?` `-`+ `:?` once trimmed: colons only at the
6523// boundaries, the dashes contiguous, no interior space or colon.
6524fn table_delimiter_alignment(cell: &str) -> Option<TableAlignment> {
6525    let bytes = cell.as_bytes();
6526    let mut cursor = 0;
6527    let left = bytes.first() == Some(&b':');
6528    if left {
6529        cursor += 1;
6530    }
6531    let dash_start = cursor;
6532    while bytes.get(cursor) == Some(&b'-') {
6533        cursor += 1;
6534    }
6535    if cursor == dash_start {
6536        return None;
6537    }
6538    let right = bytes.get(cursor) == Some(&b':');
6539    if right {
6540        cursor += 1;
6541    }
6542    if cursor != bytes.len() {
6543        return None;
6544    }
6545    Some(match (left, right) {
6546        (true, true) => TableAlignment::Center,
6547        (true, false) => TableAlignment::Left,
6548        (false, true) => TableAlignment::Right,
6549        (false, false) => TableAlignment::None,
6550    })
6551}
6552
6553/// Normalizes a table line's leading indentation: when indented code is enabled
6554/// a four-space indent would start a code block, so up to three leading spaces
6555/// are trimmed and four or more disqualifies the line.
6556fn table_indent_line(input: &str, indented_code: bool) -> Option<&str> {
6557    if indented_code {
6558        trim_up_to_three_spaces(input)
6559    } else {
6560        Some(input)
6561    }
6562}
6563
6564// True if a backtick run of `length` at `start` has a matching-length closing
6565// run later in `input`. The table row scanner still treats unescaped pipes as
6566// cell boundaries; this state only prevents extension syntax such as spoilers
6567// from being recognized inside a code span.
6568fn backtick_run_has_close(input: &str, start: usize, length: usize) -> bool {
6569    let bytes = input.as_bytes();
6570    let mut i = start + length;
6571    while i < input.len() {
6572        if bytes[i] == b'`' {
6573            let run = input[i..]
6574                .as_bytes()
6575                .iter()
6576                .take_while(|byte| **byte == b'`')
6577                .count();
6578            if run == length {
6579                return true;
6580            }
6581            i += run;
6582        } else {
6583            i += 1;
6584        }
6585    }
6586    false
6587}
6588
6589fn table_backslash_pipe_run(input: &str, cursor: usize) -> Option<(usize, bool)> {
6590    let bytes = input.as_bytes();
6591    if bytes.get(cursor) != Some(&b'\\') {
6592        return None;
6593    }
6594    let mut pipe = cursor;
6595    while bytes.get(pipe) == Some(&b'\\') {
6596        pipe += 1;
6597    }
6598    (bytes.get(pipe) == Some(&b'|')).then_some((pipe, (pipe - cursor) % 2 == 1))
6599}
6600
6601fn split_table_row(input: &str, spoiler: bool) -> Vec<String> {
6602    let trimmed = input.trim();
6603    let mut cells = Vec::new();
6604    let mut cell = String::new();
6605    let mut cursor = 0;
6606    let mut code_fence = None;
6607    let mut spoiler_open = false;
6608    // Byte offset just past the most recent genuine cell-delimiter pipe. When the
6609    // scan ends with only whitespace after it, that pipe was a trailing border and
6610    // the empty leftover cell is dropped (rather than blindly trusting that the
6611    // line ends with `|`, which mis-fires on a spoiler-close `||` or a code-span
6612    // pipe — see tbl-4).
6613    let mut trailing_delimiter_end = None;
6614
6615    while cursor < trimmed.len() {
6616        let (next, char) = next_char(trimmed, cursor).expect("valid UTF-8 byte index");
6617        // GitHub/cmark-gfm treats an odd backslash run before `|` as a literal
6618        // cell-content pipe, but an even run leaves the pipe as a delimiter. Keep
6619        // the original run before an even delimiter so the inline parser resolves
6620        // the visible backslashes correctly.
6621        if char == '\\' {
6622            if let Some((pipe, escaped)) = table_backslash_pipe_run(trimmed, cursor) {
6623                if escaped {
6624                    for _ in 0..pipe - cursor - 1 {
6625                        cell.push('\\');
6626                    }
6627                    cell.push('|');
6628                    cursor = pipe + 1;
6629                } else {
6630                    for _ in 0..pipe - cursor {
6631                        cell.push('\\');
6632                    }
6633                    cursor = pipe;
6634                }
6635                continue;
6636            }
6637        }
6638        // Backticks are never escapable, so a preceding backslash does not block a
6639        // code-span boundary (a `\` directly before a closing backtick is content,
6640        // not an escape — see tbl-3).
6641        if char == '`' {
6642            let length = trimmed[cursor..]
6643                .as_bytes()
6644                .iter()
6645                .take_while(|byte| **byte == b'`')
6646                .count();
6647            if code_fence == Some(length) {
6648                code_fence = None;
6649            } else if code_fence.is_none() && backtick_run_has_close(trimmed, cursor, length) {
6650                code_fence = Some(length);
6651            }
6652            cell.push_str(&trimmed[cursor..cursor + length]);
6653            cursor += length;
6654            continue;
6655        }
6656
6657        if spoiler
6658            && char == '|'
6659            && trimmed.as_bytes().get(cursor + 1) == Some(&b'|')
6660            && code_fence.is_some()
6661        {
6662            cell.push_str("||");
6663            cursor += 2;
6664            continue;
6665        }
6666
6667        if spoiler
6668            && char == '|'
6669            && trimmed.as_bytes().get(cursor + 1) == Some(&b'|')
6670            && code_fence.is_none()
6671            && !is_escaped_at(trimmed, cursor)
6672        {
6673            let closes_spoiler =
6674                spoiler_open && trimmed.as_bytes().get(cursor.wrapping_sub(1)) != Some(&b'|');
6675            let opens_spoiler = !spoiler_open
6676                && trimmed.as_bytes().get(cursor + 2) != Some(&b'|')
6677                && find_spoiler_close(trimmed, cursor + 2).is_some();
6678            if closes_spoiler || opens_spoiler {
6679                spoiler_open = opens_spoiler;
6680                cell.push_str("||");
6681                cursor += 2;
6682                continue;
6683            }
6684        }
6685
6686        if char == '|' && !spoiler_open && !is_escaped_at(trimmed, cursor) {
6687            cells.push(core::mem::take(&mut cell));
6688            // A delimiter ends the cell; spoiler state never spans a cell boundary.
6689            spoiler_open = false;
6690            trailing_delimiter_end = Some(next);
6691        } else {
6692            cell.push(char);
6693        }
6694        cursor = next;
6695    }
6696    cells.push(cell);
6697
6698    if trimmed.starts_with('|') {
6699        cells.remove(0);
6700    }
6701    // Drop the empty cell created by a trailing border pipe: the last genuine
6702    // delimiter must sit at the very end (only whitespace after it).
6703    if let Some(end) = trailing_delimiter_end {
6704        if trimmed[end..].trim().is_empty() {
6705            cells.pop();
6706        }
6707    }
6708    cells
6709}
6710
6711fn table_can_start(lines: &[Line<'_>], index: usize, options: &SyntaxOptions) -> bool {
6712    if !options.constructs.gfm_table || index + 1 >= lines.len() {
6713        return false;
6714    }
6715    table_can_start_source(
6716        lines[index].text,
6717        lines[index + 1].text,
6718        options.constructs.indented_code,
6719        options.constructs.spoiler,
6720    )
6721}
6722
6723pub(crate) fn gfm_table_can_start_source(header: &str, delimiter: &str) -> bool {
6724    table_can_start_source(header, delimiter, true, false)
6725}
6726
6727fn table_can_start_source(
6728    header: &str,
6729    delimiter: &str,
6730    indented_code: bool,
6731    spoiler: bool,
6732) -> bool {
6733    let Some(delimiter) = table_indent_line(delimiter, indented_code) else {
6734        return false;
6735    };
6736    if list_marker_info(delimiter).is_some() {
6737        return false;
6738    }
6739    if !table_has_separator(header, delimiter, spoiler) {
6740        return false;
6741    }
6742    let Some(alignments) = parse_table_delimiter(delimiter, spoiler) else {
6743        return false;
6744    };
6745    split_table_row(header, spoiler).len() == alignments.len()
6746}
6747
6748fn table_has_separator(header: &str, delimiter: &str, spoiler: bool) -> bool {
6749    // GFM makes leading/trailing pipes optional, so `parse_table_delimiter` plus
6750    // the header/alignment column-count check usually suffice. The one exception
6751    // is a single resolved column with no disambiguating syntax: `a\n-\nb` has
6752    // matching one-column shapes yet no pipe and no alignment colon, so it is a
6753    // loose paragraph/setext, not a table. A single column still forms a table
6754    // when a pipe appears in the header/delimiter or the delimiter carries an
6755    // explicit alignment colon (`a\n-:`, `a\n:-:`, …).
6756    let Some(alignments) = parse_table_delimiter(delimiter, spoiler) else {
6757        return true;
6758    };
6759    if alignments.len() == 1 {
6760        return contains_unescaped_pipe(header, spoiler)
6761            || contains_unescaped_pipe(delimiter, spoiler)
6762            || delimiter.contains(':');
6763    }
6764    true
6765}
6766
6767// Still used by `block_quote_table_body_row` to detect a table row appearing as
6768// a block-quote continuation line (which DOES require a pipe).
6769fn contains_unescaped_pipe(input: &str, spoiler: bool) -> bool {
6770    let mut cursor = 0;
6771    let mut code_fence = None;
6772    let mut spoiler_open = false;
6773    while cursor < input.len() {
6774        let (next, char) = next_char(input, cursor).expect("valid UTF-8 byte index");
6775        if char == '\\' {
6776            if let Some((pipe, escaped)) = table_backslash_pipe_run(input, cursor) {
6777                cursor = if escaped { pipe + 1 } else { pipe };
6778                continue;
6779            }
6780        }
6781        // Backticks are never escapable; a preceding backslash is code-span content.
6782        if char == '`' {
6783            let length = input[cursor..]
6784                .as_bytes()
6785                .iter()
6786                .take_while(|byte| **byte == b'`')
6787                .count();
6788            if code_fence == Some(length) {
6789                code_fence = None;
6790            } else if code_fence.is_none() {
6791                code_fence = Some(length);
6792            }
6793            cursor += length;
6794            continue;
6795        }
6796        if spoiler
6797            && char == '|'
6798            && input.as_bytes().get(cursor + 1) == Some(&b'|')
6799            && code_fence.is_some()
6800        {
6801            cursor += 2;
6802            continue;
6803        }
6804        if spoiler
6805            && char == '|'
6806            && input.as_bytes().get(cursor + 1) == Some(&b'|')
6807            && code_fence.is_none()
6808            && !is_escaped_at(input, cursor)
6809        {
6810            let closes_spoiler =
6811                spoiler_open && input.as_bytes().get(cursor.wrapping_sub(1)) != Some(&b'|');
6812            let opens_spoiler = !spoiler_open
6813                && input.as_bytes().get(cursor + 2) != Some(&b'|')
6814                && find_spoiler_close(input, cursor + 2).is_some();
6815            if closes_spoiler || opens_spoiler {
6816                spoiler_open = opens_spoiler;
6817                cursor += 2;
6818                continue;
6819            }
6820        }
6821        if char == '|' && !spoiler_open && !is_escaped_at(input, cursor) {
6822            return true;
6823        }
6824        cursor = next;
6825    }
6826    false
6827}
6828
6829fn likely_block_start(input: &str, options: &SyntaxOptions) -> bool {
6830    // Block-structure markers (ATX, fences, thematic breaks, list markers, math
6831    // fences, directives, …) only begin a block when indented at most 3 columns.
6832    // At >=4 columns the line is indented code, which never interrupts a
6833    // paragraph, so no marker test should fire.
6834    let Some(trimmed) = trim_up_to_three_spaces(input) else {
6835        return false;
6836    };
6837    trimmed.starts_with('#')
6838        || trimmed.starts_with('>')
6839        || trimmed.starts_with("```")
6840        || trimmed.starts_with("~~~")
6841        || list_marker_can_interrupt_paragraph(input)
6842        || parse_thematic_break(Line {
6843            text: input,
6844            eol: "",
6845            start: 0,
6846            end: input.len(),
6847            end_with_eol: input.len(),
6848            lazy: false,
6849        })
6850        .is_some()
6851        || (options.constructs.html_block && line_starts_interrupting_html_block(input))
6852        || (options.constructs.math_block && math_block_fence_length(trimmed).is_some())
6853        || (options.constructs.directive_container && trimmed.starts_with(":::"))
6854        || (options.constructs.directive_leaf && trimmed.starts_with("::"))
6855        || (options.constructs.footnote_definition && line_starts_footnote_definition(trimmed))
6856}
6857
6858// A GFM footnote definition `[^label]:` is a block boundary: it interrupts a
6859// paragraph and ends a prior footnote's lazy continuation.
6860fn line_starts_footnote_definition(trimmed: &str) -> bool {
6861    trimmed.starts_with("[^")
6862        && find_footnote_definition_label_end(trimmed)
6863            .is_some_and(|close| is_footnote_label(&trimmed[2..close]))
6864}
6865
6866fn list_marker_can_interrupt_paragraph(input: &str) -> bool {
6867    list_marker_info(input).is_some_and(|marker| {
6868        // An empty list item never interrupts a paragraph (CommonMark §5.3):
6869        // `foo\n*` is a single paragraph, not a paragraph plus an empty list.
6870        !marker.content.trim().is_empty() && (!marker.ordered || marker.start == Some(1))
6871    })
6872}
6873
6874// GFM table-body termination is stricter than paragraph interruption: an open
6875// table also ends on a list marker with EMPTY content (`-`, `*`, `1.`), which
6876// `likely_block_start` deliberately ignores for paragraphs. Used only by the
6877// table body loop; `likely_block_start` itself is left untouched.
6878fn table_body_line_ends_table(line: &str, options: &SyntaxOptions) -> bool {
6879    likely_block_start(line, options)
6880        || list_marker_info(line).is_some()
6881        || (options.constructs.html_block && line_starts_html_block(line))
6882}
6883
6884fn line_starts_interrupting_html_block(input: &str) -> bool {
6885    match trim_up_to_three_spaces(input).and_then(html_block_start) {
6886        Some(HtmlBlockKind::UntilBlank) | None => false,
6887        Some(_) => true,
6888    }
6889}
6890
6891fn parse_autolink_end(input: &str, index: usize) -> Option<usize> {
6892    input[index..].find('>').map(|end| index + end + 1)
6893}
6894
6895fn parse_html_inline(input: &str, index: usize) -> Option<(usize, String)> {
6896    let rest = &input[index..];
6897    if rest.starts_with("<!--") {
6898        let end = rest.find("-->")? + 3;
6899        return Some((index + end, rest[..end].into()));
6900    }
6901    if rest.starts_with("<?") {
6902        let end = rest.find("?>")? + 2;
6903        return Some((index + end, rest[..end].into()));
6904    }
6905    if rest.starts_with("<![CDATA[") {
6906        let end = rest.find("]]>")? + 3;
6907        return Some((index + end, rest[..end].into()));
6908    }
6909    if is_declaration_start(rest) {
6910        let end = rest.find('>')? + 1;
6911        return Some((index + end, rest[..end].into()));
6912    }
6913
6914    let (end, _) = parse_html_tag(input, index)?;
6915    Some((end, input[index..end].into()))
6916}
6917
6918fn parse_html_tag(input: &str, index: usize) -> Option<(usize, &str)> {
6919    let bytes = input.as_bytes();
6920    if bytes.get(index) != Some(&b'<') {
6921        return None;
6922    }
6923
6924    let closing = bytes.get(index + 1) == Some(&b'/');
6925    let name_start = index + if closing { 2 } else { 1 };
6926    let first = *bytes.get(name_start)?;
6927    if !first.is_ascii_alphabetic() {
6928        return None;
6929    }
6930
6931    let mut cursor = name_start + 1;
6932    while bytes.get(cursor).is_some_and(|byte| html_name_byte(*byte)) {
6933        cursor += 1;
6934    }
6935    let name = &input[name_start..cursor];
6936
6937    if closing {
6938        cursor = skip_spaces(input, cursor);
6939        if bytes.get(cursor) == Some(&b'>') {
6940            return Some((cursor + 1, name));
6941        }
6942        return None;
6943    }
6944
6945    let mut needs_space = false;
6946    loop {
6947        let before_spaces = cursor;
6948        cursor = skip_spaces(input, cursor);
6949        let had_space = cursor > before_spaces;
6950        match bytes.get(cursor) {
6951            Some(b'>') => return Some((cursor + 1, name)),
6952            Some(b'/') if bytes.get(cursor + 1) == Some(&b'>') => return Some((cursor + 2, name)),
6953            Some(byte) if had_space && html_attribute_name_start(*byte) => {
6954                cursor += 1;
6955                while bytes
6956                    .get(cursor)
6957                    .is_some_and(|byte| html_attribute_name_byte(*byte))
6958                {
6959                    cursor += 1;
6960                }
6961                let after_name = cursor;
6962                let after_spaces = skip_spaces(input, cursor);
6963                if bytes.get(after_spaces) == Some(&b'=') {
6964                    cursor = skip_spaces(input, after_spaces + 1);
6965                    cursor = parse_html_attribute_value(input, cursor)?;
6966                } else {
6967                    cursor = after_name;
6968                }
6969                needs_space = true;
6970            }
6971            Some(_) if needs_space => return None,
6972            _ => return None,
6973        }
6974    }
6975}
6976
6977fn parse_html_attribute_value(input: &str, index: usize) -> Option<usize> {
6978    let bytes = input.as_bytes();
6979    match bytes.get(index)? {
6980        b'"' | b'\'' => {
6981            let quote = bytes[index];
6982            let mut cursor = index + 1;
6983            while cursor < bytes.len() {
6984                if bytes[cursor] == quote {
6985                    return Some(cursor + 1);
6986                }
6987                cursor += 1;
6988            }
6989            None
6990        }
6991        b'=' | b'<' | b'>' | b'`' => None,
6992        _ => {
6993            let mut cursor = index;
6994            while bytes.get(cursor).is_some_and(|byte| {
6995                !byte.is_ascii_whitespace()
6996                    && !matches!(*byte, b'"' | b'\'' | b'=' | b'<' | b'>' | b'`')
6997            }) {
6998                cursor += 1;
6999            }
7000            if cursor == index {
7001                None
7002            } else {
7003                Some(cursor)
7004            }
7005        }
7006    }
7007}
7008
7009fn html_name_byte(byte: u8) -> bool {
7010    byte.is_ascii_alphanumeric() || byte == b'-'
7011}
7012
7013fn html_attribute_name_start(byte: u8) -> bool {
7014    byte.is_ascii_alphabetic() || byte == b'_' || byte == b':'
7015}
7016
7017fn html_attribute_name_byte(byte: u8) -> bool {
7018    byte.is_ascii_alphanumeric() || matches!(byte, b'_' | b':' | b'.' | b'-')
7019}
7020
7021fn skip_spaces(input: &str, mut index: usize) -> usize {
7022    while input
7023        .as_bytes()
7024        .get(index)
7025        .is_some_and(|byte| matches!(*byte, b' ' | b'\t' | b'\n' | b'\r'))
7026    {
7027        index += 1;
7028    }
7029    index
7030}
7031
7032fn is_autolink(input: &str) -> bool {
7033    let inner = &input[1..input.len() - 1];
7034    is_uri_autolink(inner) || is_email_autolink(inner)
7035}
7036
7037fn is_uri_autolink(input: &str) -> bool {
7038    let Some(colon) = input.find(':') else {
7039        return false;
7040    };
7041    let scheme = &input[..colon];
7042    if scheme.len() < 2 || scheme.len() > 32 {
7043        return false;
7044    }
7045    let mut bytes = scheme.bytes();
7046    if !bytes.next().is_some_and(|byte| byte.is_ascii_alphabetic()) {
7047        return false;
7048    }
7049    if !bytes.all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'+' | b'.' | b'-')) {
7050        return false;
7051    }
7052    input[colon + 1..]
7053        .chars()
7054        .all(|char| !matches!(char, '<' | '>') && !char.is_control() && !char.is_whitespace())
7055}
7056
7057fn is_email_autolink(input: &str) -> bool {
7058    if input.chars().any(char::is_whitespace) {
7059        return false;
7060    }
7061    let Some(at) = input.find('@') else {
7062        return false;
7063    };
7064    if at == 0 || at + 1 >= input.len() {
7065        return false;
7066    }
7067    // Angle-bracket `<email>` autolinks use the strict CommonMark domain
7068    // grammar but, unlike the GFM bare form, allow a single (dotless) label.
7069    is_email_local_part(&input[..at]) && is_email_domain(&input[at + 1..], 1)
7070}
7071
7072// GFM literal-autolink dispatch. Tries, in order: `http(s)://` URLs, `www.`
7073// URLs, extended-protocol (`mailto:`/`xmpp:`) emails, and bare emails. Each
7074// branch enforces cmark-gfm's per-scheme preceding-character guard and its
7075// domain/host rules; the trailing trim is shared (`autolink_delim`). The
7076// returned destination is the synthesized href (a `http://`/`mailto:` prefix
7077// may be prepended); the caller keeps `input[index..end]` as the visible
7078// original.
7079fn parse_literal_autolink(
7080    input: &str,
7081    index: usize,
7082    gfm: bool,
7083    relaxed: bool,
7084) -> Option<(usize, String)> {
7085    let rest = &input[index..];
7086
7087    if gfm {
7088        // `http://` / `https://` URLs. cmark requires the char before the scheme
7089        // to be non-alphanumeric (so `mmmhttp://…` does not link from `mmmh`).
7090        if let Some(scheme_len) = rest
7091            .starts_with("http://")
7092            .then_some(7)
7093            .or_else(|| rest.starts_with("https://").then_some(8))
7094        {
7095            if !literal_scheme_prefix_ok(input, index) {
7096                return None;
7097            }
7098            let host = &input[index + scheme_len..];
7099            // A non-empty domain or bracketed IPv6 host is additionally
7100            // required, so `http://`, `http://#`, `http://$` are not links.
7101            if !http_literal_host_ok(host) {
7102                if relaxed {
7103                    // Let cmark-gfm's relaxed `scheme://` pass decide cases
7104                    // such as a bare `http://` followed by whitespace.
7105                } else {
7106                    return None;
7107                }
7108            } else {
7109                // The URL extent is scanned from the very start (after `://`) and the
7110                // trailing trim runs over the whole URL. Relaxed mode balances
7111                // brackets/braces so `[abc]`/`{abc}`/IPv6 hosts stay in the URL.
7112                let end = autolink_url_end(input, index + scheme_len, index + scheme_len, relaxed);
7113                if end <= index + scheme_len {
7114                    return None;
7115                }
7116                if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7117                    return None;
7118                }
7119                return Some((end, input[index..end].into()));
7120            }
7121        }
7122
7123        // `www.` URLs (synthesize a `http://` href). cmark allows the preceding
7124        // char to be one of `*_~(` or whitespace (or start of input).
7125        if rest
7126            .as_bytes()
7127            .get(..4)
7128            .is_some_and(|prefix| prefix.eq_ignore_ascii_case(b"www."))
7129        {
7130            if !literal_www_prefix_ok(input, index) {
7131                return None;
7132            }
7133            check_domain(rest, false)?;
7134            let end = autolink_url_end(input, index, index, relaxed);
7135            if end <= index || (!relaxed && end <= index + 3 && !literal_starts_line(input, index))
7136            {
7137                return None;
7138            }
7139            if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7140                return None;
7141            }
7142            let mut destination = String::from("http://");
7143            destination.push_str(&input[index..end]);
7144            return Some((end, destination));
7145        }
7146
7147        if let Some(email) = parse_literal_email(input, index) {
7148            return Some(email);
7149        }
7150    }
7151
7152    if relaxed {
7153        // cmark-gfm "relaxed" URL autolinks: a bare `scheme://…` for any scheme
7154        // (`smb://`, `irc://`, `rdar://`, `we://`, `nex://[…]`, …) or a
7155        // scheme-less leading `://…` (`://-`). Requires the same non-alphanumeric
7156        // preceding char as the http literal and at least one non-whitespace
7157        // char after `://`; no host/domain validation (cmark-gfm is permissive
7158        // here — `smb:///path` and `://-` both linkify). The extent is balanced.
7159        if literal_scheme_prefix_ok(input, index) {
7160            if let Some(after_slashes) = relaxed_scheme_after_slashes(rest) {
7161                let body_start = index + after_slashes;
7162                let next = input[body_start..].chars().next();
7163                if next.is_none_or(|char| char.is_whitespace()) && after_slashes == 3 {
7164                    return None;
7165                }
7166                let end = autolink_url_end(input, body_start, body_start, true);
7167                if end > index {
7168                    if literal_autolink_suppressed_by_link_label(input, index, end, relaxed, gfm) {
7169                        return None;
7170                    }
7171                    return Some((end, input[index..end].into()));
7172                }
7173            }
7174        }
7175    }
7176
7177    None
7178}
7179
7180// Returns the byte offset (within `rest`) just past a relaxed `scheme://` (any
7181// ASCII-alpha-then-`[alnum+. -]` scheme) or scheme-less `://` prefix, if `rest`
7182// starts with one. No scheme length cap — cmark-gfm's relaxed autolink is
7183// permissive. Returns `None` for a bare `scheme:` without `//` (that is the
7184// email/angle-autolink path's job).
7185fn relaxed_scheme_after_slashes(rest: &str) -> Option<usize> {
7186    let bytes = rest.as_bytes();
7187    if bytes.starts_with(b"://") {
7188        return Some(3);
7189    }
7190    let first = bytes.first()?;
7191    if !first.is_ascii_alphabetic() {
7192        return None;
7193    }
7194    let mut i = 1;
7195    while i < bytes.len() {
7196        match bytes[i] {
7197            b':' => break,
7198            byte if byte.is_ascii_alphanumeric() || matches!(byte, b'+' | b'.' | b'-') => i += 1,
7199            _ => return None,
7200        }
7201    }
7202    if bytes.get(i..i + 3) == Some(b"://") {
7203        Some(i + 3)
7204    } else {
7205        None
7206    }
7207}
7208
7209// The char immediately before a `http(s)://` literal must be non-alphabetic.
7210// An escaped `<` (`\<http://…`) is just literal text before the URL, so the
7211// literal still forms (the `<` is not treated as an angle-autolink opener).
7212fn literal_scheme_prefix_ok(input: &str, index: usize) -> bool {
7213    if index == 0 {
7214        return true;
7215    }
7216    let Some(previous) = input[..index].chars().next_back() else {
7217        return true;
7218    };
7219    !previous.is_ascii_alphabetic()
7220}
7221
7222// The char before a `www.` literal must be one of cmark-gfm's accepted ASCII
7223// delimiters or ordinary Markdown layout whitespace. Unicode whitespace is not
7224// a start delimiter for this branch.
7225fn literal_www_prefix_ok(input: &str, index: usize) -> bool {
7226    if index == 0 {
7227        return true;
7228    }
7229    let Some(previous) = input[..index].chars().next_back() else {
7230        return true;
7231    };
7232    if matches!(previous, '*' | '_' | '~' | '(' | '[' | ']') {
7233        return true;
7234    }
7235    matches!(previous, ' ' | '\t' | '\n' | '\r')
7236}
7237
7238fn literal_starts_line(input: &str, index: usize) -> bool {
7239    index == 0
7240        || input
7241            .as_bytes()
7242            .get(index - 1)
7243            .is_some_and(|byte| matches!(byte, b'\n' | b'\r'))
7244}
7245
7246fn literal_autolink_suppressed_by_link_label(
7247    input: &str,
7248    index: usize,
7249    end: usize,
7250    relaxed: bool,
7251    gfm_autolink_literal: bool,
7252) -> bool {
7253    if !has_unclosed_link_label_opener(input, index) {
7254        return false;
7255    }
7256    if input[end..].starts_with("](") && !link_resource_tail_has_close(input, end + 2) {
7257        return true;
7258    }
7259    !relaxed && !gfm_autolink_literal && input.as_bytes().get(end).is_some_and(|byte| *byte == b']')
7260}
7261
7262fn has_unclosed_link_label_opener(input: &str, index: usize) -> bool {
7263    let line_start = input[..index]
7264        .rfind(['\n', '\r'])
7265        .map_or(0, |offset| offset + 1);
7266    let mut depth = 0usize;
7267    let mut cursor = line_start;
7268    while cursor < index {
7269        let Some((next, char)) = next_char(input, cursor) else {
7270            break;
7271        };
7272        match char {
7273            '\\' => {
7274                cursor = next_char(input, next)
7275                    .map(|(after_escape, _)| after_escape)
7276                    .unwrap_or(next);
7277                continue;
7278            }
7279            '[' => depth += 1,
7280            ']' => {
7281                depth = depth.saturating_sub(1);
7282            }
7283            _ => {}
7284        }
7285        cursor = next;
7286    }
7287    depth > 0
7288}
7289
7290fn link_resource_tail_has_close(input: &str, start: usize) -> bool {
7291    let mut cursor = start;
7292    while cursor < input.len() {
7293        let Some((next, char)) = next_char(input, cursor) else {
7294            break;
7295        };
7296        match char {
7297            '\\' => {
7298                cursor = next_char(input, next)
7299                    .map(|(after_escape, _)| after_escape)
7300                    .unwrap_or(next);
7301                continue;
7302            }
7303            '\n' | '\r' => return false,
7304            ')' => return true,
7305            _ => {}
7306        }
7307        cursor = next;
7308    }
7309    false
7310}
7311
7312fn http_literal_host_ok(host: &str) -> bool {
7313    if host.starts_with('[') {
7314        return bracketed_ipv6_host_end(host).is_some();
7315    }
7316    match host.chars().next() {
7317        Some(char) if char.is_ascii() && char.is_ascii_alphanumeric() => {
7318            check_domain(host, true).is_some()
7319        }
7320        Some(char) if !char.is_ascii() && is_valid_hostchar(char) => {
7321            check_domain(host, true).is_some()
7322        }
7323        _ => false,
7324    }
7325}
7326
7327fn bracketed_ipv6_host_end(host: &str) -> Option<usize> {
7328    let close = host.find(']')?;
7329    (close > 1).then_some(close + 1)
7330}
7331
7332// Port of cmark-gfm `is_valid_hostchar`: a host char is valid when it is not a
7333// Unicode space and not a Unicode punctuation character.
7334fn is_valid_hostchar(char: char) -> bool {
7335    !char.is_whitespace() && !crate::unicode_punctuation::is_unicode_punctuation(char)
7336}
7337
7338// Port of cmark-gfm `check_domain`. Scans the leading host of `data` (up to the
7339// first non-host char) and returns its byte length, or `None` when invalid.
7340// Rejects a `_` in either of the last two `.`-separated host segments (unless
7341// the host has >10 segments — a DoS guard). When `allow_short` is false a dot
7342// is required (the `www.` rule). The URL extent past the host is determined by
7343// `autolink_url_end`, so the precise length here only gates validity.
7344//
7345// cmark walks bytes with `is_valid_hostchar` decoding each char; this walks
7346// chars directly (UTF-8 safe) over the host prefix, which yields the same
7347// dot/underscore-segment verdict. A `\` escapes the following char.
7348fn check_domain(data: &str, allow_short: bool) -> Option<usize> {
7349    let mut np = 0usize;
7350    let mut uscore1 = 0usize;
7351    let mut uscore2 = 0usize;
7352    let mut host_len = 0usize;
7353
7354    let mut chars = data.char_indices().peekable();
7355    while let Some((offset, char)) = chars.next() {
7356        // cmark's accounting loop runs `for (i = 1; i < size - 1; i++)`: it
7357        // never inspects the first char (offset 0) nor the final char of the
7358        // chunk. We replicate that — a trailing `_` (e.g. `http://a_`) is not
7359        // counted, so the link still forms.
7360        let account = offset != 0 && chars.peek().is_some();
7361        match char {
7362            '\\' => {
7363                // Escape: consume the next char as a literal host char.
7364                host_len = offset + char.len_utf8();
7365                if let Some((next_off, next)) = chars.next() {
7366                    host_len = next_off + next.len_utf8();
7367                }
7368            }
7369            '_' if account => {
7370                uscore2 += 1;
7371                host_len = offset + char.len_utf8();
7372            }
7373            '.' if account => {
7374                uscore1 = uscore2;
7375                uscore2 = 0;
7376                np += 1;
7377                host_len = offset + char.len_utf8();
7378            }
7379            '_' | '.' | '-' => {
7380                host_len = offset + char.len_utf8();
7381            }
7382            _ => {
7383                if !is_valid_hostchar(char) {
7384                    break;
7385                }
7386                host_len = offset + char.len_utf8();
7387            }
7388        }
7389    }
7390
7391    if (uscore1 > 0 || uscore2 > 0) && np <= 10 {
7392        return None;
7393    }
7394
7395    if allow_short || np > 0 {
7396        Some(host_len)
7397    } else {
7398        None
7399    }
7400}
7401
7402// Forward scan from `start` for the URL extent: every char up to whitespace,
7403// `<`, or `]` ends the URL. CommonMark allows `>` and `[` inside (the renderer
7404// percent-encodes them); a `]` is additionally treated as a hard URL boundary
7405// (autolink-3), so a `]` ends the scan and is never part of the link.
7406// `trim_from` is where the trailing trim may reach (the URL start).
7407fn autolink_url_end(input: &str, start: usize, trim_from: usize, balanced: bool) -> usize {
7408    let bytes = input.as_bytes();
7409    let mut end = start;
7410    // Relaxed (cmark-gfm) URL extents balance `[`/`]` and `{`/`}` so an IPv6
7411    // host `nex://[fe80…]/z` and a balanced `[abc]`/`{abc}` run stay inside the
7412    // URL while an unbalanced trailing `]`/`}` ends it. Strict (GFM literal)
7413    // extents stop at the first `]` (no balancing) — the two oracle shapes
7414    // differ on purpose (`autolink_brackets_unbalanced` keeps both `]`;
7415    // `autolink_relaxed_links_brackets_balanced` keeps one).
7416    let mut bracket_depth = 0i32;
7417    let mut curly_depth = 0i32;
7418    let mut strict_has_open_bracket = false;
7419    let mut strict_inside_backticks = false;
7420    for (offset, char) in input[start..].char_indices() {
7421        if char.is_whitespace() || char == '<' || is_autolink_terminating_control(char) {
7422            break;
7423        }
7424        if balanced {
7425            match char {
7426                '[' => bracket_depth += 1,
7427                ']' => {
7428                    if bracket_depth > 0 {
7429                        bracket_depth -= 1;
7430                    } else {
7431                        break;
7432                    }
7433                }
7434                '{' => curly_depth += 1,
7435                '}' => {
7436                    if curly_depth > 0 {
7437                        curly_depth -= 1;
7438                    } else {
7439                        break;
7440                    }
7441                }
7442                _ => {}
7443            }
7444        } else {
7445            match char {
7446                '[' => strict_has_open_bracket = true,
7447                '`' => strict_inside_backticks = !strict_inside_backticks,
7448                ']' if !strict_has_open_bracket && !strict_inside_backticks => break,
7449                _ => {}
7450            }
7451        }
7452        // Round-trip guard: when a literal autolink ends (a trailing entity
7453        // run, punctuation trim, unbalanced `)`, or the `]`/`<` hard boundary),
7454        // the text that follows often begins with a char the serializer escapes
7455        // with a backslash (`\&`, `\[`, `\]`, `\<`, `\>`, `\*`, `\_`, …). The
7456        // URL scan must stop at such a `\<punct>` so the escape is not re-merged
7457        // into the destination. A `\` before `.` (or any non-punctuation) is a
7458        // genuine literal backslash inside the URL (e.g. `www.x.com/a\.`), which
7459        // the serializer never produces, so it stays part of the URL.
7460        if char == '\\' {
7461            if let Some(&next) = bytes.get(start + offset + 1) {
7462                let next_is_escapable_punct = next.is_ascii_punctuation() && next != b'.';
7463                if next_is_escapable_punct {
7464                    break;
7465                }
7466            }
7467        }
7468        end = start + offset + char.len_utf8();
7469    }
7470    autolink_delim(input, trim_from, end)
7471}
7472
7473fn is_autolink_terminating_control(char: char) -> bool {
7474    matches!(char, '\u{2066}'..='\u{2069}')
7475}
7476
7477// Port of cmark-gfm `autolink_delim`: trim trailing delimiters from the end of
7478// the URL. A trailing `) ? ! . , : * _ ~ ' "` is trimmed; `)` only when there
7479// are more `)` than `(` in the link; a trailing `&…;` entity run is excluded
7480// whole; a lone trailing `;` is trimmed.
7481fn autolink_delim(input: &str, start: usize, mut end: usize) -> usize {
7482    let bytes = input.as_bytes();
7483    let mut opening = 0usize;
7484    let mut closing = 0usize;
7485    for &byte in &bytes[start..end] {
7486        match byte {
7487            b'(' => opening += 1,
7488            b')' => closing += 1,
7489            _ => {}
7490        }
7491    }
7492
7493    while end > start {
7494        match bytes[end - 1] {
7495            b')' => {
7496                if closing <= opening {
7497                    break;
7498                }
7499                closing -= 1;
7500                end -= 1;
7501            }
7502            b'?' | b'!' | b'.' | b',' | b':' | b'*' | b'_' | b'~' | b'\'' | b'"' => {
7503                end -= 1;
7504            }
7505            b';' => {
7506                // A trailing hex numeric character reference `&#x…;` is excluded
7507                // whole. This is the round-trip dual of the serializer, which
7508                // encodes a text char that would otherwise merge into the URL as
7509                // a hex entity; no autolink-oracle URL ends in `&#x…;`, so this
7510                // is conformance-safe (decimal `&#…;` is left intact to match
7511                // the oracle, which keeps `www.a&#35` in the URL).
7512                if let Some(amp) = trailing_hex_entity_run_start(bytes, start, end) {
7513                    end = amp;
7514                } else {
7515                    // Walk back over alphanumerics; if they reach a `&`, exclude
7516                    // the whole `&…;` entity run, otherwise trim just the `;`.
7517                    let mut new_end = end - 1;
7518                    while new_end > start && bytes[new_end - 1].is_ascii_alphanumeric() {
7519                        new_end -= 1;
7520                    }
7521                    if new_end > start && new_end < end - 1 && bytes[new_end - 1] == b'&' {
7522                        end = new_end - 1;
7523                    } else {
7524                        end -= 1;
7525                    }
7526                }
7527            }
7528            _ => break,
7529        }
7530    }
7531    end
7532}
7533
7534// When the URL ends with a hex numeric character reference `&#x[hex]+;`, returns
7535// the offset of its leading `&`; otherwise `None`. Used only by `autolink_delim`
7536// to trim the serializer's round-trip boundary marker (the serializer encodes a
7537// would-merge text char as `&#xNN;`). Decimal `&#…;` is intentionally NOT
7538// matched so the oracle's `www.a&#35` URLs stay intact.
7539fn trailing_hex_entity_run_start(bytes: &[u8], start: usize, end: usize) -> Option<usize> {
7540    if end <= start || bytes[end - 1] != b';' {
7541        return None;
7542    }
7543    let mut cursor = end - 1;
7544    while cursor > start && bytes[cursor - 1].is_ascii_hexdigit() {
7545        cursor -= 1;
7546    }
7547    // Require at least one hex digit, then `&#x` (case-insensitive `x`).
7548    if cursor == end - 1 || cursor < start + 3 {
7549        return None;
7550    }
7551    let x = bytes[cursor - 1];
7552    if (x == b'x' || x == b'X') && bytes[cursor - 2] == b'#' && bytes[cursor - 3] == b'&' {
7553        Some(cursor - 3)
7554    } else {
7555        None
7556    }
7557}
7558
7559// GFM bare-email literal (and the extended `mailto:`/`xmpp:` protocol forms).
7560// `index` must be the link start: cmark anchors the email at the left edge
7561// found by rewinding from `@` over `[A-Za-z0-9._+-]` (or a `mailto:`/`xmpp:`
7562// scheme), so this only succeeds when the char before `index` is not part of
7563// that left extent.
7564fn parse_literal_email(input: &str, index: usize) -> Option<(usize, String)> {
7565    let rest = &input[index..];
7566    let at = rest.find('@')?;
7567    if at == 0 {
7568        return None;
7569    }
7570    let local = &rest[..at];
7571
7572    // Determine whether this `@` is preceded by an extended protocol scheme
7573    // (`mailto:` / `xmpp:`), which both relaxes the href synthesis and (xmpp)
7574    // allows `/` in the domain.
7575    let (auto_mailto, is_xmpp) = classify_email_local(local);
7576
7577    // Left-boundary guard (autolink-1): the char before `index` must not be a
7578    // local-part continuation char, otherwise the true link starts earlier and
7579    // this position is interior. After a recognized scheme, the scheme's own
7580    // preceding-char rule is what matters.
7581    if !email_left_boundary_ok(input, index, auto_mailto) {
7582        return None;
7583    }
7584
7585    if !email_local_is_valid(local, auto_mailto) {
7586        return None;
7587    }
7588
7589    let domain_start = index + at + 1;
7590    let domain_end = literal_email_domain_end(input, domain_start, is_xmpp)?;
7591    let trimmed = autolink_delim(input, domain_start, domain_end);
7592    if trimmed <= domain_start {
7593        return None;
7594    }
7595
7596    let domain = &input[domain_start..trimmed];
7597    if !is_gfm_email_domain(domain, is_xmpp) {
7598        return None;
7599    }
7600
7601    let mut destination = String::new();
7602    if auto_mailto {
7603        destination.push_str("mailto:");
7604    }
7605    destination.push_str(&input[index..trimmed]);
7606    Some((trimmed, destination))
7607}
7608
7609// Classify the local part for the extended-protocol forms. Returns
7610// `(auto_mailto, is_xmpp)`: `mailto:user` → (false, false); `xmpp:user` →
7611// (false, true); a bare local part → (true, false). The scheme match is
7612// case-insensitive.
7613fn classify_email_local(local: &str) -> (bool, bool) {
7614    if let Some(rest) = strip_ci_prefix(local, "mailto:") {
7615        if !rest.is_empty() {
7616            return (false, false);
7617        }
7618    }
7619    if let Some(rest) = strip_ci_prefix(local, "xmpp:") {
7620        if !rest.is_empty() {
7621            return (false, true);
7622        }
7623    }
7624    (true, false)
7625}
7626
7627fn strip_ci_prefix<'a>(input: &'a str, prefix: &str) -> Option<&'a str> {
7628    let bytes = input.as_bytes();
7629    let plen = prefix.len();
7630    if bytes.len() >= plen && bytes[..plen].eq_ignore_ascii_case(prefix.as_bytes()) {
7631        Some(&input[plen..])
7632    } else {
7633        None
7634    }
7635}
7636
7637// The left-boundary check for an email literal. The link is anchored at its
7638// true left edge: the preceding char must not be an ASCII alphanumeric (which
7639// would extend the local part leftward). For the bare form, a preceding `/` is
7640// also rejected (`/a@b.c` is not linked), while the extended
7641// `mailto:`/`xmpp:` form permits `/` before the scheme (so
7642// `…/mailto:beedrill@…` links).
7643fn email_left_boundary_ok(input: &str, index: usize, auto_mailto: bool) -> bool {
7644    if index == 0 {
7645        return true;
7646    }
7647    let Some(previous) = input[..index].chars().next_back() else {
7648        return true;
7649    };
7650    if previous.is_ascii_alphanumeric() {
7651        if auto_mailto
7652            && input[index..].starts_with('+')
7653            && prefix_ends_with_gfm_email(input, index)
7654        {
7655            return true;
7656        }
7657        return false;
7658    }
7659    if auto_mailto && previous == '/' {
7660        return false;
7661    }
7662    true
7663}
7664
7665fn prefix_ends_with_gfm_email(input: &str, end: usize) -> bool {
7666    let start = input[..end]
7667        .rfind(char::is_whitespace)
7668        .map_or(0, |offset| offset + 1);
7669    let candidate = &input[start..end];
7670    let Some(at) = candidate.rfind('@') else {
7671        return false;
7672    };
7673    email_local_is_valid(&candidate[..at], true) && is_gfm_email_domain(&candidate[at + 1..], false)
7674}
7675
7676// Validate the email local part. For the bare form, every char must be a GFM
7677// email atext byte (`[A-Za-z0-9.+_-]` plus the dot-separated structure). For
7678// the extended-protocol forms, the part after the scheme is validated.
7679fn email_local_is_valid(local: &str, auto_mailto: bool) -> bool {
7680    let body = if auto_mailto {
7681        local
7682    } else if let Some(rest) = strip_ci_prefix(local, "mailto:") {
7683        rest
7684    } else if let Some(rest) = strip_ci_prefix(local, "xmpp:") {
7685        rest
7686    } else {
7687        local
7688    };
7689    !body.is_empty() && body.bytes().all(is_gfm_email_local_byte)
7690}
7691
7692// GFM email local-part charset (autolink-1): a narrower set than RFC atext,
7693// matching cmark's rewind class `[A-Za-z0-9.+_-]`.
7694fn is_gfm_email_local_byte(byte: u8) -> bool {
7695    byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b'+' | b'_' | b'-')
7696}
7697
7698fn is_email_local_part(input: &str) -> bool {
7699    !input.is_empty()
7700        && input
7701            .split('.')
7702            .all(|segment| !segment.is_empty() && segment.bytes().all(is_email_atext))
7703}
7704
7705fn is_email_atext(byte: u8) -> bool {
7706    byte.is_ascii_alphanumeric()
7707        || matches!(
7708            byte,
7709            b'!' | b'#'
7710                | b'$'
7711                | b'%'
7712                | b'&'
7713                | b'\''
7714                | b'*'
7715                | b'+'
7716                | b'/'
7717                | b'='
7718                | b'?'
7719                | b'^'
7720                | b'_'
7721                | b'`'
7722                | b'{'
7723                | b'|'
7724                | b'}'
7725                | b'~'
7726                | b'-'
7727        )
7728}
7729
7730// Port of cmark-gfm's email-domain scan (`postprocess_text`). Scans forward
7731// from `index` over the email domain, accepting alphanumerics, `-`, `_`, and
7732// `.`; for the `xmpp:` form a `/` is also accepted (path). A dot only counts
7733// toward the "at least one dot" requirement when it is followed by an
7734// alphanumeric. The scanned span must be >= 1 byte, contain at least one such
7735// dot, and end in an alphabetic char or a dot. Returns the domain end offset
7736// (before trailing trim), or `None` when invalid.
7737fn literal_email_domain_end(input: &str, index: usize, is_xmpp: bool) -> Option<usize> {
7738    let bytes = input.as_bytes();
7739    let mut end = index;
7740    let mut np = 0usize;
7741    while end < bytes.len() {
7742        let byte = bytes[end];
7743        if byte.is_ascii_alphanumeric() {
7744            end += 1;
7745        } else if byte == b'.' && end + 1 < bytes.len() && bytes[end + 1].is_ascii_alphanumeric() {
7746            np += 1;
7747            end += 1;
7748        } else if byte == b'-' || byte == b'_' || (byte == b'/' && is_xmpp) {
7749            // `-`/`_` always continue the domain; `/` continues only the xmpp
7750            // path form.
7751            end += 1;
7752        } else {
7753            break;
7754        }
7755    }
7756    if end <= index {
7757        return None;
7758    }
7759    let len = end - index;
7760    let last = bytes[end - 1];
7761    if len < 1 || np == 0 || !(last.is_ascii_alphabetic() || last == b'.') {
7762        return None;
7763    }
7764    Some(end)
7765}
7766
7767// Final structural validation of the trimmed email domain. The cmark scan
7768// already enforced the dot/last-char rules; this re-checks them after the
7769// shared trailing trim removed any delimiters, and rejects a domain ending in
7770// `-`/`_` (autolink-7: a hyphen in the final label disqualifies the link).
7771fn is_gfm_email_domain(input: &str, is_xmpp: bool) -> bool {
7772    if input.is_empty() {
7773        return false;
7774    }
7775    // A `/` path is only legal in the `xmpp:` form; split it off for the host
7776    // structural checks.
7777    let host = if is_xmpp {
7778        input.split('/').next().unwrap_or(input)
7779    } else {
7780        input
7781    };
7782    if !host.contains('.') {
7783        return false;
7784    }
7785    let last = host.as_bytes()[host.len() - 1];
7786    // The final label must not end in `-` or `_`, and the trailing label may
7787    // not be all ASCII digits.
7788    if matches!(last, b'-' | b'_') {
7789        return false;
7790    }
7791    host.split('.').all(|label| {
7792        !label.is_empty()
7793            && label
7794                .bytes()
7795                .all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_'))
7796    })
7797}
7798
7799fn is_email_domain(input: &str, min_labels: usize) -> bool {
7800    let mut label_count = 0usize;
7801    for label in input.split('.') {
7802        label_count += 1;
7803        let bytes = label.as_bytes();
7804        if bytes.is_empty()
7805            || bytes.len() > 63
7806            || !bytes
7807                .first()
7808                .is_some_and(|byte| byte.is_ascii_alphanumeric())
7809            || !bytes
7810                .last()
7811                .is_some_and(|byte| byte.is_ascii_alphanumeric())
7812            || !bytes
7813                .iter()
7814                .all(|byte| byte.is_ascii_alphanumeric() || *byte == b'-')
7815        {
7816            return false;
7817        }
7818    }
7819    label_count >= min_labels
7820}
7821
7822fn is_footnote_label(label: &str) -> bool {
7823    !label.is_empty()
7824        && reference_label_is_within_limit(label)
7825        && !label.chars().any(char::is_whitespace)
7826}
7827
7828fn find_footnote_definition_label_end(input: &str) -> Option<usize> {
7829    let close = find_footnote_reference_label_end(input, 2)?;
7830    if input.as_bytes().get(close + 1) == Some(&b':') {
7831        Some(close)
7832    } else {
7833        None
7834    }
7835}
7836
7837fn find_footnote_reference_label_end(input: &str, mut cursor: usize) -> Option<usize> {
7838    while cursor < input.len() {
7839        let (next, char) = next_char(input, cursor)?;
7840        if char == ']' && !is_escaped_at(input, cursor) {
7841            return Some(cursor);
7842        }
7843        cursor = next;
7844    }
7845    None
7846}
7847
7848fn find_inline_footnote_end(input: &str, mut cursor: usize) -> Option<usize> {
7849    let mut depth = 0usize;
7850    while cursor < input.len() {
7851        let (next, char) = next_char(input, cursor)?;
7852        if !is_escaped_at(input, cursor) {
7853            match char {
7854                '[' => depth += 1,
7855                ']' if depth == 0 => return Some(cursor),
7856                ']' => depth = depth.saturating_sub(1),
7857                _ => {}
7858            }
7859        }
7860        cursor = next;
7861    }
7862    None
7863}