Skip to main content

markdown_syntax/
validate.rs

1//! AST validation: [`Document::validate`] walks the tree and reports each
2//! invalid or unsupported node shape as a [`Diagnostic`]. Serialization and HTML
3//! rendering run this first and refuse an invalid document.
4
5use alloc::vec::Vec;
6
7use crate::{
8    ast::{
9        Autolink, AutolinkKind, Block, CodeInline, ContainerDirective, DirectiveAttribute,
10        Document, Escape, Heading, Inline, LeafDirective, List, MathInlineKind, Table,
11        TextDirective,
12    },
13    diagnostic::Diagnostic,
14    span::Span,
15};
16
17impl Document {
18    /// Validate this document's AST shape, returning a diagnostic for each
19    /// invalid or unsupported node (empty when the document is well-formed).
20    pub fn validate(&self) -> Vec<Diagnostic> {
21        validate_document(self)
22    }
23}
24
25pub(crate) fn validate_document(document: &Document) -> Vec<Diagnostic> {
26    let mut diagnostics = Vec::new();
27    for block in &document.children {
28        validate_block(block, &mut diagnostics);
29    }
30    diagnostics
31}
32
33fn validate_block(block: &Block, diagnostics: &mut Vec<Diagnostic>) {
34    match block {
35        Block::Paragraph(paragraph) => validate_inlines(&paragraph.children, diagnostics),
36        Block::Heading(heading) => validate_heading(heading, diagnostics),
37        Block::BlockQuote(block_quote) => {
38            for child in &block_quote.children {
39                validate_block(child, diagnostics);
40            }
41        }
42        Block::Alert(alert) => {
43            for child in &alert.children {
44                validate_block(child, diagnostics);
45            }
46        }
47        Block::List(list) => {
48            validate_list_start(list, diagnostics);
49            for item in &list.children {
50                for child in &item.children {
51                    validate_block(child, diagnostics);
52                }
53            }
54        }
55        Block::DescriptionList(list) => {
56            for item in &list.children {
57                validate_inlines(&item.term, diagnostics);
58                if item.details.is_empty() {
59                    diagnostics.push(Diagnostic::invalid(
60                        item.meta.span,
61                        "description item must contain at least one details block",
62                    ));
63                }
64                for details in &item.details {
65                    for child in &details.children {
66                        validate_block(child, diagnostics);
67                    }
68                }
69            }
70        }
71        Block::Table(table) => validate_table(table, diagnostics),
72        Block::FootnoteDefinition(definition) => {
73            if definition.identifier.is_empty() {
74                diagnostics.push(Diagnostic::invalid(
75                    definition.meta.span,
76                    "footnote definition identifier cannot be empty",
77                ));
78            }
79            for child in &definition.children {
80                validate_block(child, diagnostics);
81            }
82        }
83        Block::Definition(definition) => {
84            if definition.identifier.trim().is_empty() {
85                diagnostics.push(Diagnostic::invalid(
86                    definition.meta.span,
87                    "definition identifier cannot be empty",
88                ));
89            }
90        }
91        Block::LeafDirective(directive) => validate_leaf_directive(directive, diagnostics),
92        Block::ContainerDirective(directive) => {
93            validate_container_directive(directive, diagnostics)
94        }
95        Block::ThematicBreak(_)
96        | Block::CodeBlock(_)
97        | Block::HtmlBlock(_)
98        | Block::MathBlock(_)
99        | Block::Frontmatter(_)
100        | Block::MdxEsm(_)
101        | Block::MdxExpression(_)
102        | Block::MdxJsx(_) => {}
103    }
104}
105
106fn validate_heading(heading: &Heading, diagnostics: &mut Vec<Diagnostic>) {
107    if heading.depth == 0 || heading.depth > 6 {
108        diagnostics.push(Diagnostic::invalid(
109            heading.meta.span,
110            "heading depth must be in the range 1..=6",
111        ));
112    }
113    validate_inlines(&heading.children, diagnostics);
114}
115
116fn validate_table(table: &Table, diagnostics: &mut Vec<Diagnostic>) {
117    if table.rows.is_empty() {
118        diagnostics.push(Diagnostic::invalid(
119            table.meta.span,
120            "table must contain at least a header row",
121        ));
122        return;
123    }
124
125    let width = table.rows[0].cells.len();
126    if width == 0 {
127        diagnostics.push(Diagnostic::invalid(
128            table.meta.span,
129            "table header row must contain at least one cell",
130        ));
131    }
132
133    if table.alignments.len() != width {
134        diagnostics.push(Diagnostic::invalid(
135            table.meta.span,
136            "table alignment count must match header width",
137        ));
138    }
139
140    for row in &table.rows {
141        if row.cells.len() != width {
142            diagnostics.push(Diagnostic::invalid(
143                row.meta.span,
144                "table row width must match header width",
145            ));
146        }
147        for cell in &row.cells {
148            validate_inlines(&cell.children, diagnostics);
149        }
150    }
151}
152
153fn validate_leaf_directive(directive: &LeafDirective, diagnostics: &mut Vec<Diagnostic>) {
154    validate_directive_name(directive.meta.span, &directive.name, diagnostics);
155    validate_directive_attributes(&directive.attributes, diagnostics);
156    validate_inlines(&directive.label, diagnostics);
157}
158
159fn validate_container_directive(directive: &ContainerDirective, diagnostics: &mut Vec<Diagnostic>) {
160    validate_directive_name(directive.meta.span, &directive.name, diagnostics);
161    validate_directive_attributes(&directive.attributes, diagnostics);
162    validate_inlines(&directive.label, diagnostics);
163    for child in &directive.children {
164        validate_block(child, diagnostics);
165    }
166}
167
168fn validate_inlines(inlines: &[Inline], diagnostics: &mut Vec<Diagnostic>) {
169    if let Some(Inline::LineBreak(node)) = inlines.last() {
170        diagnostics.push(Diagnostic::invalid(
171            node.meta.span,
172            "hard line break cannot be the final inline of its container",
173        ));
174    }
175    for inline in inlines {
176        match inline {
177            Inline::Emphasis(node) => {
178                validate_emphasis_container(&node.children, node.meta.span, diagnostics)
179            }
180            Inline::Strong(node) => {
181                validate_emphasis_container(&node.children, node.meta.span, diagnostics)
182            }
183            Inline::Underline(node) => {
184                validate_emphasis_container(&node.children, node.meta.span, diagnostics)
185            }
186            Inline::Delete(node) => {
187                validate_emphasis_container(&node.children, node.meta.span, diagnostics)
188            }
189            Inline::Insert(node) => {
190                validate_emphasis_container(&node.children, node.meta.span, diagnostics)
191            }
192            Inline::Mark(node) => {
193                validate_emphasis_container(&node.children, node.meta.span, diagnostics)
194            }
195            Inline::Subscript(node) => {
196                validate_emphasis_container(&node.children, node.meta.span, diagnostics)
197            }
198            Inline::Superscript(node) => {
199                validate_emphasis_container(&node.children, node.meta.span, diagnostics)
200            }
201            Inline::Spoiler(node) => {
202                validate_emphasis_container(&node.children, node.meta.span, diagnostics)
203            }
204            Inline::Shortcode(node) => {
205                if node.name.is_empty() {
206                    diagnostics.push(Diagnostic::invalid(
207                        node.meta.span,
208                        "shortcode name cannot be empty",
209                    ));
210                }
211            }
212            Inline::Link(node) => validate_inlines(&node.children, diagnostics),
213            Inline::Image(node) => validate_inlines(&node.alt, diagnostics),
214            Inline::LinkReference(node) => {
215                if node.identifier.is_empty() {
216                    diagnostics.push(Diagnostic::invalid(
217                        node.meta.span,
218                        "link reference identifier cannot be empty",
219                    ));
220                }
221                validate_inlines(&node.children, diagnostics);
222            }
223            Inline::ImageReference(node) => {
224                if node.identifier.is_empty() {
225                    diagnostics.push(Diagnostic::invalid(
226                        node.meta.span,
227                        "image reference identifier cannot be empty",
228                    ));
229                }
230                validate_inlines(&node.alt, diagnostics);
231            }
232            Inline::Escape(node) => validate_escape(node, diagnostics),
233            Inline::CharacterReference(node) => {
234                if node.reference.is_empty() {
235                    diagnostics.push(Diagnostic::invalid(
236                        node.meta.span,
237                        "character reference source cannot be empty",
238                    ));
239                }
240                if node.value.is_empty() {
241                    diagnostics.push(Diagnostic::invalid(
242                        node.meta.span,
243                        "character reference value cannot be empty",
244                    ));
245                }
246            }
247            Inline::TextDirective(node) => validate_text_directive(node, diagnostics),
248            Inline::FootnoteReference(node) => {
249                if node.identifier.is_empty() {
250                    diagnostics.push(Diagnostic::invalid(
251                        node.meta.span,
252                        "footnote reference identifier cannot be empty",
253                    ));
254                }
255            }
256            Inline::InlineFootnote(node) => validate_inlines(&node.children, diagnostics),
257            Inline::WikiLink(node) => {
258                if node.target.is_empty() {
259                    diagnostics.push(Diagnostic::invalid(
260                        node.meta.span,
261                        "wikilink target cannot be empty",
262                    ));
263                }
264            }
265            Inline::Code(node) => validate_code_inline(node, diagnostics),
266            Inline::Autolink(node) => validate_autolink(node, diagnostics),
267            Inline::Math(node) => {
268                if let MathInlineKind::Dollar { dollars: 0 } = node.kind {
269                    diagnostics.push(Diagnostic::invalid(
270                        node.meta.span,
271                        "dollar-fenced inline math must have a fence length of at least 1",
272                    ));
273                }
274            }
275            Inline::Text(_)
276            | Inline::Html(_)
277            | Inline::SoftBreak(_)
278            | Inline::LineBreak(_)
279            | Inline::MdxExpression(_)
280            | Inline::MdxJsx(_) => {}
281        }
282    }
283}
284
285fn validate_emphasis_container(
286    children: &[Inline],
287    span: Option<Span>,
288    diagnostics: &mut Vec<Diagnostic>,
289) {
290    if children.is_empty() {
291        diagnostics.push(Diagnostic::invalid(
292            span,
293            "emphasis-like inline container cannot have empty children",
294        ));
295    }
296    validate_inlines(children, diagnostics);
297}
298
299fn validate_escape(escape: &Escape, diagnostics: &mut Vec<Diagnostic>) {
300    if !escape.value.is_ascii_punctuation() {
301        diagnostics.push(Diagnostic::invalid(
302            escape.meta.span,
303            "escaped value must be an ASCII punctuation character",
304        ));
305    }
306}
307
308fn validate_autolink(autolink: &Autolink, diagnostics: &mut Vec<Diagnostic>) {
309    // GFM literal autolinks carry a synthesized destination that MAY contain
310    // `>` (the renderer percent-encodes it). Only angle-bracket autolinks
311    // forbid whitespace, `<`, and `>` in the destination.
312    if matches!(autolink.kind, AutolinkKind::GfmLiteral { .. }) {
313        return;
314    }
315    if autolink
316        .destination
317        .chars()
318        .any(|char| char.is_whitespace() || char == '<' || char == '>')
319    {
320        diagnostics.push(Diagnostic::invalid(
321            autolink.meta.span,
322            "autolink destination cannot contain whitespace, `<`, or `>`",
323        ));
324    }
325}
326
327fn validate_code_inline(code: &CodeInline, diagnostics: &mut Vec<Diagnostic>) {
328    if code.fence_length == 0 {
329        return;
330    }
331    // A code span fence of length N is closed only by a backtick run of exactly
332    // length N. A run shorter or longer than the fence is inert, so only an
333    // exactly-matching interior run would close the raw passthrough early.
334    if raw_has_backtick_run(&code.raw, code.fence_length) {
335        diagnostics.push(Diagnostic::invalid(
336            code.meta.span,
337            "inline code raw passthrough contains a backtick run equal to its fence length",
338        ));
339    }
340}
341
342fn raw_has_backtick_run(input: &str, length: usize) -> bool {
343    let mut current = 0;
344    for byte in input.bytes() {
345        if byte == b'`' {
346            current += 1;
347        } else {
348            if current == length {
349                return true;
350            }
351            current = 0;
352        }
353    }
354    current == length
355}
356
357fn validate_list_start(list: &List, diagnostics: &mut Vec<Diagnostic>) {
358    if !list.ordered {
359        return;
360    }
361    let Some(start) = list.start else {
362        return;
363    };
364    if start > 999_999_999 {
365        diagnostics.push(Diagnostic::invalid(
366            list.meta.span,
367            "ordered list start must be representable in at most 9 digits",
368        ));
369    }
370}
371
372fn validate_text_directive(directive: &TextDirective, diagnostics: &mut Vec<Diagnostic>) {
373    validate_directive_name(directive.meta.span, &directive.name, diagnostics);
374    validate_directive_attributes(&directive.attributes, diagnostics);
375    validate_inlines(&directive.label, diagnostics);
376}
377
378fn validate_directive_name(span: Option<Span>, name: &str, diagnostics: &mut Vec<Diagnostic>) {
379    if !is_directive_name(name) {
380        diagnostics.push(Diagnostic::invalid(
381            span,
382            "directive name must start with a letter and contain letters, digits, `_`, or `-`",
383        ));
384    }
385}
386
387fn validate_directive_attributes(
388    attributes: &[DirectiveAttribute],
389    diagnostics: &mut Vec<Diagnostic>,
390) {
391    for attribute in attributes {
392        if !is_attribute_name(&attribute.name) {
393            diagnostics.push(Diagnostic::invalid(
394                None,
395                "directive attribute name must start with a letter, `_`, or `-`",
396            ));
397        }
398    }
399}
400
401pub(crate) fn is_directive_name(name: &str) -> bool {
402    let mut chars = name.chars();
403    let Some(first) = chars.next() else {
404        return false;
405    };
406    if !first.is_ascii_alphabetic() {
407        return false;
408    }
409    chars.all(|char| char.is_ascii_alphanumeric() || char == '_' || char == '-')
410}
411
412pub(crate) fn is_attribute_name(name: &str) -> bool {
413    let mut chars = name.chars();
414    let Some(first) = chars.next() else {
415        return false;
416    };
417    if !(first.is_ascii_alphabetic() || first == '_' || first == '-') {
418        return false;
419    }
420    chars.all(|char| char.is_ascii_alphanumeric() || char == '_' || char == '-' || char == ':')
421}