comrak/
nodes.rs

1//! The CommonMark AST.
2
3use crate::arena_tree::Node;
4use std::cell::RefCell;
5use std::convert::TryFrom;
6
7#[cfg(feature = "shortcodes")]
8pub use crate::parser::shortcodes::NodeShortCode;
9
10pub use crate::parser::alert::{AlertType, NodeAlert};
11pub use crate::parser::math::NodeMath;
12pub use crate::parser::multiline_block_quote::NodeMultilineBlockQuote;
13
14/// The core AST node enum.
15#[derive(Debug, Clone, PartialEq, Eq)]
16#[cfg_attr(test, derive(strum::EnumDiscriminants))]
17#[cfg_attr(
18    test,
19    strum_discriminants(vis(pub(crate)), derive(strum::VariantArray, Hash))
20)]
21pub enum NodeValue {
22    /// The root of every CommonMark document.  Contains **blocks**.
23    Document,
24
25    /// Non-Markdown front matter.  Treated as an opaque blob.
26    FrontMatter(String),
27
28    /// **Block**. A [block quote](https://github.github.com/gfm/#block-quotes).  Contains other
29    /// **blocks**.
30    ///
31    /// ``` md
32    /// > A block quote.
33    /// ```
34    BlockQuote,
35
36    /// **Block**.  A [list](https://github.github.com/gfm/#lists).  Contains
37    /// [list items](https://github.github.com/gfm/#list-items).
38    ///
39    /// ``` md
40    /// * An unordered list
41    /// * Another item
42    ///
43    /// 1. An ordered list
44    /// 2. Another item
45    /// ```
46    List(NodeList),
47
48    /// **Block**.  A [list item](https://github.github.com/gfm/#list-items).  Contains other
49    /// **blocks**.
50    Item(NodeList),
51
52    /// **Block**. A description list, enabled with `ext_description_lists` option.  Contains
53    /// description items.
54    ///
55    /// It is required to put a blank line between terms and details.
56    ///
57    /// ``` md
58    /// Term 1
59    ///
60    /// : Details 1
61    ///
62    /// Term 2
63    ///
64    /// : Details 2
65    /// ```
66    DescriptionList,
67
68    /// *Block**. An item of a description list.  Contains a term and one details block.
69    DescriptionItem(NodeDescriptionItem),
70
71    /// **Block**. Term of an item in a definition list.
72    DescriptionTerm,
73
74    /// **Block**. Details of an item in a definition list.
75    DescriptionDetails,
76
77    /// **Block**. A code block; may be [fenced](https://github.github.com/gfm/#fenced-code-blocks)
78    /// or [indented](https://github.github.com/gfm/#indented-code-blocks).  Contains raw text
79    /// which is not parsed as Markdown, although is HTML escaped.
80    CodeBlock(NodeCodeBlock),
81
82    /// **Block**. A [HTML block](https://github.github.com/gfm/#html-blocks).  Contains raw text
83    /// which is neither parsed as Markdown nor HTML escaped.
84    HtmlBlock(NodeHtmlBlock),
85
86    /// **Block**. A [paragraph](https://github.github.com/gfm/#paragraphs).  Contains **inlines**.
87    Paragraph,
88
89    /// **Block**. A heading; may be an [ATX heading](https://github.github.com/gfm/#atx-headings)
90    /// or a [setext heading](https://github.github.com/gfm/#setext-headings). Contains
91    /// **inlines**.
92    Heading(NodeHeading),
93
94    /// **Block**. A [thematic break](https://github.github.com/gfm/#thematic-breaks).  Has no
95    /// children.
96    ThematicBreak,
97
98    /// **Block**. A footnote definition.  The `String` is the footnote's name.
99    /// Contains other **blocks**.
100    FootnoteDefinition(NodeFootnoteDefinition),
101
102    /// **Block**. A [table](https://github.github.com/gfm/#tables-extension-) per the GFM spec.
103    /// Contains table rows.
104    Table(NodeTable),
105
106    /// **Block**. A table row.  The `bool` represents whether the row is the header row or not.
107    /// Contains table cells.
108    TableRow(bool),
109
110    /// **Block**.  A table cell.  Contains **inlines**.
111    TableCell,
112
113    /// **Inline**.  [Textual content](https://github.github.com/gfm/#textual-content).  All text
114    /// in a document will be contained in a `Text` node.
115    Text(String),
116
117    /// **Block**. [Task list item](https://github.github.com/gfm/#task-list-items-extension-).
118    /// The value is the symbol that was used in the brackets to mark a task item as checked, or
119    /// None if the item is unchecked.
120    TaskItem(Option<char>),
121
122    /// **Inline**.  A [soft line break](https://github.github.com/gfm/#soft-line-breaks).  If
123    /// the `hardbreaks` option is set in `Options` during formatting, it will be formatted
124    /// as a `LineBreak`.
125    SoftBreak,
126
127    /// **Inline**.  A [hard line break](https://github.github.com/gfm/#hard-line-breaks).
128    LineBreak,
129
130    /// **Inline**.  A [code span](https://github.github.com/gfm/#code-spans).
131    Code(NodeCode),
132
133    /// **Inline**.  [Raw HTML](https://github.github.com/gfm/#raw-html) contained inline.
134    HtmlInline(String),
135
136    /// **Block/Inline**.  A Raw output node. This will be inserted verbatim into CommonMark and
137    /// HTML output. It can only be created programmatically, and is never parsed from input.
138    Raw(String),
139
140    /// **Inline**.  [Emphasized](https://github.github.com/gfm/#emphasis-and-strong-emphasis)
141    /// text.
142    Emph,
143
144    /// **Inline**.  [Strong](https://github.github.com/gfm/#emphasis-and-strong-emphasis) text.
145    Strong,
146
147    /// **Inline**.  [Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) text
148    /// per the GFM spec.
149    Strikethrough,
150
151    /// **Inline**.  Superscript.  Enabled with `ext_superscript` option.
152    Superscript,
153
154    /// **Inline**.  A [link](https://github.github.com/gfm/#links) to some URL, with possible
155    /// title.
156    Link(NodeLink),
157
158    /// **Inline**.  An [image](https://github.github.com/gfm/#images).
159    Image(NodeLink),
160
161    /// **Inline**.  A footnote reference.
162    FootnoteReference(NodeFootnoteReference),
163
164    #[cfg(feature = "shortcodes")]
165    /// **Inline**. An Emoji character generated from a shortcode. Enable with feature "shortcodes".
166    ShortCode(NodeShortCode),
167
168    /// **Inline**. A math span. Contains raw text which is not parsed as Markdown.
169    /// Dollar math or code math
170    ///
171    /// Inline math $1 + 2$ and $`1 + 2`$
172    ///
173    /// Display math $$1 + 2$$ and
174    /// $$
175    /// 1 + 2
176    /// $$
177    ///
178    Math(NodeMath),
179
180    /// **Block**. A [multiline block quote](https://github.github.com/gfm/#block-quotes).  Spans multiple
181    /// lines and contains other **blocks**.
182    ///
183    /// ``` md
184    /// >>>
185    /// A paragraph.
186    ///
187    /// - item one
188    /// - item two
189    /// >>>
190    /// ```
191    MultilineBlockQuote(NodeMultilineBlockQuote),
192
193    /// **Inline**.  A character that has been [escaped](https://github.github.com/gfm/#backslash-escapes)
194    ///
195    /// Enabled with [`escaped_char_spans`](crate::RenderOptionsBuilder::escaped_char_spans).
196    Escaped,
197
198    /// **Inline**.  A wikilink to some URL.
199    WikiLink(NodeWikiLink),
200
201    /// **Inline**.  Underline. Enabled with `underline` option.
202    Underline,
203
204    /// **Inline**.  Subscript. Enabled with `subscript` options.
205    Subscript,
206
207    /// **Inline**.  Spoilered text.  Enabled with `spoiler` option.
208    SpoileredText,
209
210    /// **Inline**. Text surrounded by escaped markup. Enabled with `spoiler` option.
211    /// The `String` is the tag to be escaped.
212    EscapedTag(String),
213
214    /// **Block**. GitHub style alert boxes which uses a modified blockquote syntax.
215    /// Enabled with the `alerts` option.
216    Alert(NodeAlert),
217}
218
219/// Alignment of a single table cell.
220#[derive(Debug, Copy, Clone, PartialEq, Eq)]
221pub enum TableAlignment {
222    /// Cell content is unaligned.
223    None,
224
225    /// Cell content is aligned left.
226    Left,
227
228    /// Cell content is centered.
229    Center,
230
231    /// Cell content is aligned right.
232    Right,
233}
234
235impl TableAlignment {
236    pub(crate) fn xml_name(&self) -> Option<&'static str> {
237        match *self {
238            TableAlignment::None => None,
239            TableAlignment::Left => Some("left"),
240            TableAlignment::Center => Some("center"),
241            TableAlignment::Right => Some("right"),
242        }
243    }
244}
245
246/// The metadata of a table
247#[derive(Debug, Default, Clone, PartialEq, Eq)]
248pub struct NodeTable {
249    /// The table alignments
250    pub alignments: Vec<TableAlignment>,
251
252    /// Number of columns of the table
253    pub num_columns: usize,
254
255    /// Number of rows of the table
256    pub num_rows: usize,
257
258    /// Number of non-empty, non-autocompleted cells
259    pub num_nonempty_cells: usize,
260}
261
262/// An inline [code span](https://github.github.com/gfm/#code-spans).
263#[derive(Default, Debug, Clone, PartialEq, Eq)]
264pub struct NodeCode {
265    /// The number of backticks
266    pub num_backticks: usize,
267
268    /// The content of the inline code span.
269    /// As the contents are not interpreted as Markdown at all,
270    /// they are contained within this structure,
271    /// rather than inserted into a child inline of any kind.
272    pub literal: String,
273}
274
275/// The details of a link's destination, or an image's source.
276#[derive(Default, Debug, Clone, PartialEq, Eq)]
277pub struct NodeLink {
278    /// The URL for the link destination or image source.
279    pub url: String,
280
281    /// The title for the link or image.
282    ///
283    /// Note this field is used for the `title` attribute by the HTML formatter even for images;
284    /// `alt` text is supplied in the image inline text.
285    pub title: String,
286}
287
288/// The details of a wikilink's destination.
289#[derive(Default, Debug, Clone, PartialEq, Eq)]
290pub struct NodeWikiLink {
291    /// The URL for the link destination.
292    pub url: String,
293}
294
295/// The metadata of a list; the kind of list, the delimiter used and so on.
296#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
297pub struct NodeList {
298    /// The kind of list (bullet (unordered) or ordered).
299    pub list_type: ListType,
300
301    /// Number of spaces before the list marker.
302    pub marker_offset: usize,
303
304    /// Number of characters between the start of the list marker and the item text (including the list marker(s)).
305    pub padding: usize,
306
307    /// For ordered lists, the ordinal the list starts at.
308    pub start: usize,
309
310    /// For ordered lists, the delimiter after each number.
311    pub delimiter: ListDelimType,
312
313    /// For bullet lists, the character used for each bullet.
314    pub bullet_char: u8,
315
316    /// Whether the list is [tight](https://github.github.com/gfm/#tight), i.e. whether the
317    /// paragraphs are wrapped in `<p>` tags when formatted as HTML.
318    pub tight: bool,
319
320    /// Whether the list contains tasks (checkbox items)
321    pub is_task_list: bool,
322}
323
324/// The metadata of a description list
325#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
326pub struct NodeDescriptionItem {
327    /// Number of spaces before the list marker.
328    pub marker_offset: usize,
329
330    /// Number of characters between the start of the list marker and the item text (including the list marker(s)).
331    pub padding: usize,
332
333    /// Whether the list is [tight](https://github.github.com/gfm/#tight), i.e. whether the
334    /// paragraphs are wrapped in `<p>` tags when formatted as HTML.
335    pub tight: bool,
336}
337
338/// The type of list.
339#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
340pub enum ListType {
341    /// A bullet list, i.e. an unordered list.
342    #[default]
343    Bullet,
344
345    /// An ordered list.
346    Ordered,
347}
348
349/// The delimiter for ordered lists, i.e. the character which appears after each number.
350#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
351pub enum ListDelimType {
352    /// A period character `.`.
353    #[default]
354    Period,
355
356    /// A paren character `)`.
357    Paren,
358}
359
360impl ListDelimType {
361    pub(crate) fn xml_name(&self) -> &'static str {
362        match *self {
363            ListDelimType::Period => "period",
364            ListDelimType::Paren => "paren",
365        }
366    }
367}
368
369/// The metadata and data of a code block (fenced or indented).
370#[derive(Default, Debug, Clone, PartialEq, Eq)]
371pub struct NodeCodeBlock {
372    /// Whether the code block is fenced.
373    pub fenced: bool,
374
375    /// For fenced code blocks, the fence character itself (`` ` `` or `~`).
376    pub fence_char: u8,
377
378    /// For fenced code blocks, the length of the fence.
379    pub fence_length: usize,
380
381    /// For fenced code blocks, the indentation level of the code within the block.
382    pub fence_offset: usize,
383
384    /// For fenced code blocks, the [info string](https://github.github.com/gfm/#info-string) after
385    /// the opening fence, if any.
386    pub info: String,
387
388    /// The literal contents of the code block.  As the contents are not interpreted as Markdown at
389    /// all, they are contained within this structure, rather than inserted into a child inline of
390    /// any kind.
391    pub literal: String,
392}
393
394/// The metadata of a heading.
395#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
396pub struct NodeHeading {
397    /// The level of the header; from 1 to 6 for ATX headings, 1 or 2 for setext headings.
398    pub level: u8,
399
400    /// Whether the heading is setext (if not, ATX).
401    pub setext: bool,
402}
403
404/// The metadata of an included HTML block.
405#[derive(Debug, Default, Clone, PartialEq, Eq)]
406pub struct NodeHtmlBlock {
407    /// The HTML block's type
408    pub block_type: u8,
409
410    /// The literal contents of the HTML block.  Per NodeCodeBlock, the content is included here
411    /// rather than in any inline.
412    pub literal: String,
413}
414
415/// The metadata of a footnote definition.
416#[derive(Debug, Default, Clone, PartialEq, Eq)]
417pub struct NodeFootnoteDefinition {
418    /// The name of the footnote.
419    pub name: String,
420
421    /// Total number of references to this footnote
422    pub total_references: u32,
423}
424
425/// The metadata of a footnote reference.
426#[derive(Debug, Default, Clone, PartialEq, Eq)]
427pub struct NodeFootnoteReference {
428    /// The name of the footnote.
429    pub name: String,
430
431    /// The index of reference to the same footnote
432    pub ref_num: u32,
433
434    /// The index of the footnote in the document.
435    pub ix: u32,
436}
437
438impl NodeValue {
439    /// Indicates whether this node is a block node or inline node.
440    pub fn block(&self) -> bool {
441        matches!(
442            *self,
443            NodeValue::Document
444                | NodeValue::BlockQuote
445                | NodeValue::FootnoteDefinition(_)
446                | NodeValue::List(..)
447                | NodeValue::DescriptionList
448                | NodeValue::DescriptionItem(_)
449                | NodeValue::DescriptionTerm
450                | NodeValue::DescriptionDetails
451                | NodeValue::Item(..)
452                | NodeValue::CodeBlock(..)
453                | NodeValue::HtmlBlock(..)
454                | NodeValue::Paragraph
455                | NodeValue::Heading(..)
456                | NodeValue::ThematicBreak
457                | NodeValue::Table(..)
458                | NodeValue::TableRow(..)
459                | NodeValue::TableCell
460                | NodeValue::TaskItem(..)
461                | NodeValue::MultilineBlockQuote(_)
462                | NodeValue::Alert(_)
463        )
464    }
465
466    /// Whether the type the node is of can contain inline nodes.
467    pub fn contains_inlines(&self) -> bool {
468        matches!(
469            *self,
470            NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::TableCell
471        )
472    }
473
474    /// Return a reference to the text of a `Text` inline, if this node is one.
475    ///
476    /// Convenience method.
477    pub fn text(&self) -> Option<&String> {
478        match *self {
479            NodeValue::Text(ref t) => Some(t),
480            _ => None,
481        }
482    }
483
484    /// Return a mutable reference to the text of a `Text` inline, if this node is one.
485    ///
486    /// Convenience method.
487    pub fn text_mut(&mut self) -> Option<&mut String> {
488        match *self {
489            NodeValue::Text(ref mut t) => Some(t),
490            _ => None,
491        }
492    }
493
494    pub(crate) fn accepts_lines(&self) -> bool {
495        matches!(
496            *self,
497            NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::CodeBlock(..)
498        )
499    }
500
501    pub(crate) fn xml_node_name(&self) -> &'static str {
502        match *self {
503            NodeValue::Document => "document",
504            NodeValue::BlockQuote => "block_quote",
505            NodeValue::FootnoteDefinition(_) => "footnote_definition",
506            NodeValue::List(..) => "list",
507            NodeValue::DescriptionList => "description_list",
508            NodeValue::DescriptionItem(_) => "description_item",
509            NodeValue::DescriptionTerm => "description_term",
510            NodeValue::DescriptionDetails => "description_details",
511            NodeValue::Item(..) => "item",
512            NodeValue::CodeBlock(..) => "code_block",
513            NodeValue::HtmlBlock(..) => "html_block",
514            NodeValue::Paragraph => "paragraph",
515            NodeValue::Heading(..) => "heading",
516            NodeValue::ThematicBreak => "thematic_break",
517            NodeValue::Table(..) => "table",
518            NodeValue::TableRow(..) => "table_row",
519            NodeValue::TableCell => "table_cell",
520            NodeValue::Text(..) => "text",
521            NodeValue::SoftBreak => "softbreak",
522            NodeValue::LineBreak => "linebreak",
523            NodeValue::Image(..) => "image",
524            NodeValue::Link(..) => "link",
525            NodeValue::Emph => "emph",
526            NodeValue::Strong => "strong",
527            NodeValue::Code(..) => "code",
528            NodeValue::HtmlInline(..) => "html_inline",
529            NodeValue::Raw(..) => "raw",
530            NodeValue::Strikethrough => "strikethrough",
531            NodeValue::FrontMatter(_) => "frontmatter",
532            NodeValue::TaskItem { .. } => "taskitem",
533            NodeValue::Superscript => "superscript",
534            NodeValue::FootnoteReference(..) => "footnote_reference",
535            #[cfg(feature = "shortcodes")]
536            NodeValue::ShortCode(_) => "shortcode",
537            NodeValue::MultilineBlockQuote(_) => "multiline_block_quote",
538            NodeValue::Escaped => "escaped",
539            NodeValue::Math(..) => "math",
540            NodeValue::WikiLink(..) => "wikilink",
541            NodeValue::Underline => "underline",
542            NodeValue::Subscript => "subscript",
543            NodeValue::SpoileredText => "spoiler",
544            NodeValue::EscapedTag(_) => "escaped_tag",
545            NodeValue::Alert(_) => "alert",
546        }
547    }
548}
549
550/// A single node in the CommonMark AST.
551///
552/// The struct contains metadata about the node's position in the original document, and the core
553/// enum, `NodeValue`.
554#[derive(Debug, Clone, PartialEq, Eq)]
555pub struct Ast {
556    /// The node value itself.
557    pub value: NodeValue,
558
559    /// The positions in the source document this node comes from.
560    pub sourcepos: Sourcepos,
561    pub(crate) internal_offset: usize,
562
563    pub(crate) content: String,
564    pub(crate) open: bool,
565    pub(crate) last_line_blank: bool,
566    pub(crate) table_visited: bool,
567    pub(crate) line_offsets: Vec<usize>,
568}
569
570/// Represents the position in the source Markdown this node was rendered from.
571#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
572pub struct Sourcepos {
573    /// The line and column of the first character of this node.
574    pub start: LineColumn,
575    /// The line and column of the last character of this node.
576    pub end: LineColumn,
577}
578
579impl std::fmt::Display for Sourcepos {
580    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
581        write!(
582            f,
583            "{}:{}-{}:{}",
584            self.start.line, self.start.column, self.end.line, self.end.column,
585        )
586    }
587}
588
589impl From<(usize, usize, usize, usize)> for Sourcepos {
590    fn from(sp: (usize, usize, usize, usize)) -> Sourcepos {
591        Sourcepos {
592            start: LineColumn {
593                line: sp.0,
594                column: sp.1,
595            },
596            end: LineColumn {
597                line: sp.2,
598                column: sp.3,
599            },
600        }
601    }
602}
603
604/// Represents the 1-based line and column positions of a given character.
605#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
606pub struct LineColumn {
607    /// The 1-based line number of the character.
608    pub line: usize,
609    /// The 1-based column number of the character.
610    pub column: usize,
611}
612
613impl From<(usize, usize)> for LineColumn {
614    fn from(lc: (usize, usize)) -> LineColumn {
615        LineColumn {
616            line: lc.0,
617            column: lc.1,
618        }
619    }
620}
621
622impl LineColumn {
623    /// Return a new LineColumn based on this one, with the column adjusted by offset.
624    pub fn column_add(&self, offset: isize) -> LineColumn {
625        LineColumn {
626            line: self.line,
627            column: usize::try_from((self.column as isize) + offset).unwrap(),
628        }
629    }
630}
631
632impl Ast {
633    /// Create a new AST node with the given value.
634    pub fn new(value: NodeValue, start: LineColumn) -> Self {
635        Ast {
636            value,
637            content: String::new(),
638            sourcepos: (start.line, start.column, start.line, 0).into(),
639            internal_offset: 0,
640            open: true,
641            last_line_blank: false,
642            table_visited: false,
643            line_offsets: Vec::with_capacity(0),
644        }
645    }
646}
647
648/// The type of a node within the document.
649///
650/// It is bound by the lifetime `'a`, which corresponds to the `Arena` nodes are
651/// allocated in. Child `Ast`s are wrapped in `RefCell` for interior mutability.
652///
653/// You can construct a new `AstNode` from a `NodeValue` using the `From` trait:
654///
655/// ```no_run
656/// # use comrak::nodes::{AstNode, NodeValue};
657/// let root = AstNode::from(NodeValue::Document);
658/// ```
659///
660/// Note that no sourcepos information is given to the created node. If you wish
661/// to assign sourcepos information, use the `From` trait to create an `AstNode`
662/// from an `Ast`:
663///
664/// ```no_run
665/// # use comrak::nodes::{Ast, AstNode, NodeValue};
666/// let root = AstNode::from(Ast::new(
667///     NodeValue::Paragraph,
668///     (4, 1).into(), // start_line, start_col
669/// ));
670/// ```
671///
672/// Adjust the `end` position manually.
673///
674/// For practical use, you'll probably need it allocated in an `Arena`, in which
675/// case you can use `.into()` to simplify creation:
676///
677/// ```no_run
678/// # use comrak::{nodes::{AstNode, NodeValue}, Arena};
679/// # let arena = Arena::<AstNode>::new();
680/// let node_in_arena = arena.alloc(NodeValue::Document.into());
681/// ```
682pub type AstNode<'a> = Node<'a, RefCell<Ast>>;
683
684impl<'a> From<NodeValue> for AstNode<'a> {
685    /// Create a new AST node with the given value. The sourcepos is set to (0,0)-(0,0).
686    fn from(value: NodeValue) -> Self {
687        Node::new(RefCell::new(Ast::new(value, LineColumn::default())))
688    }
689}
690
691impl<'a> From<Ast> for AstNode<'a> {
692    /// Create a new AST node with the given Ast.
693    fn from(ast: Ast) -> Self {
694        Node::new(RefCell::new(ast))
695    }
696}
697
698/// Validation errors produced by [Node::validate].
699#[derive(Debug, Clone)]
700pub enum ValidationError<'a> {
701    /// The type of a child node is not allowed in the parent node. This can happen when an inline
702    /// node is found in a block container, a block is found in an inline node, etc.
703    InvalidChildType {
704        /// The parent node.
705        parent: &'a AstNode<'a>,
706        /// The child node.
707        child: &'a AstNode<'a>,
708    },
709}
710
711impl<'a> Node<'a, RefCell<Ast>> {
712    /// The comrak representation of a markdown node in Rust isn't strict enough to rule out
713    /// invalid trees according to the CommonMark specification. One simple example is that block
714    /// containers, such as lists, should only contain blocks, but it's possible to put naked
715    /// inline text in a list item. Such invalid trees can lead comrak to generate incorrect output
716    /// if rendered.
717    ///
718    /// This method performs additional structural checks to ensure that a markdown AST is valid
719    /// according to the CommonMark specification.
720    ///
721    /// Note that those invalid trees can only be generated programmatically. Parsing markdown with
722    /// comrak, on the other hand, should always produce a valid tree.
723    pub fn validate(&'a self) -> Result<(), ValidationError<'a>> {
724        let mut stack = vec![self];
725
726        while let Some(node) = stack.pop() {
727            // Check that this node type is valid wrt to the type of its parent.
728            if let Some(parent) = node.parent() {
729                if !can_contain_type(parent, &node.data.borrow().value) {
730                    return Err(ValidationError::InvalidChildType {
731                        parent,
732                        child: node,
733                    });
734                }
735            }
736
737            stack.extend(node.children());
738        }
739
740        Ok(())
741    }
742}
743
744pub(crate) fn last_child_is_open<'a>(node: &'a AstNode<'a>) -> bool {
745    node.last_child().map_or(false, |n| n.data.borrow().open)
746}
747
748/// Returns true if the given node can contain a node with the given value.
749pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
750    match *child {
751        NodeValue::Document => {
752            return false;
753        }
754        NodeValue::FrontMatter(_) => {
755            return matches!(node.data.borrow().value, NodeValue::Document);
756        }
757        _ => {}
758    }
759
760    match node.data.borrow().value {
761        NodeValue::Document
762        | NodeValue::BlockQuote
763        | NodeValue::FootnoteDefinition(_)
764        | NodeValue::DescriptionTerm
765        | NodeValue::DescriptionDetails
766        | NodeValue::Item(..)
767        | NodeValue::TaskItem(..) => {
768            child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..))
769        }
770
771        NodeValue::List(..) => matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..)),
772
773        NodeValue::DescriptionList => matches!(*child, NodeValue::DescriptionItem(_)),
774
775        NodeValue::DescriptionItem(_) => matches!(
776            *child,
777            NodeValue::DescriptionTerm | NodeValue::DescriptionDetails
778        ),
779
780        #[cfg(feature = "shortcodes")]
781        NodeValue::ShortCode(..) => !child.block(),
782
783        NodeValue::Paragraph
784        | NodeValue::Heading(..)
785        | NodeValue::Emph
786        | NodeValue::Strong
787        | NodeValue::Link(..)
788        | NodeValue::Image(..)
789        | NodeValue::WikiLink(..)
790        | NodeValue::Strikethrough
791        | NodeValue::Superscript
792        | NodeValue::SpoileredText
793        | NodeValue::Underline
794        | NodeValue::Subscript
795        // XXX: this is quite a hack: the EscapedTag _contains_ whatever was
796        // possibly going to fall into the spoiler. This should be fixed in
797        // inlines.
798        | NodeValue::EscapedTag(_)
799        => !child.block(),
800
801        NodeValue::Table(..) => matches!(*child, NodeValue::TableRow(..)),
802
803        NodeValue::TableRow(..) => matches!(*child, NodeValue::TableCell),
804
805        #[cfg(not(feature = "shortcodes"))]
806        NodeValue::TableCell => matches!(
807            *child,
808            NodeValue::Text(..)
809                | NodeValue::Code(..)
810                | NodeValue::Emph
811                | NodeValue::Strong
812                | NodeValue::Link(..)
813                | NodeValue::Image(..)
814                | NodeValue::Strikethrough
815                | NodeValue::HtmlInline(..)
816                | NodeValue::Math(..)
817                | NodeValue::WikiLink(..)
818                | NodeValue::FootnoteReference(..)
819                | NodeValue::Superscript
820                | NodeValue::SpoileredText
821                | NodeValue::Underline
822                | NodeValue::Subscript
823        ),
824
825        #[cfg(feature = "shortcodes")]
826        NodeValue::TableCell => matches!(
827            *child,
828            NodeValue::Text(..)
829            | NodeValue::Code(..)
830            | NodeValue::Emph
831            | NodeValue::Strong
832            | NodeValue::Link(..)
833            | NodeValue::Image(..)
834            | NodeValue::Strikethrough
835            | NodeValue::HtmlInline(..)
836            | NodeValue::Math(..)
837            | NodeValue::WikiLink(..)
838            | NodeValue::FootnoteReference(..)
839            | NodeValue::Superscript
840            | NodeValue::SpoileredText
841            | NodeValue::Underline
842            | NodeValue::Subscript
843            | NodeValue::ShortCode(..)
844        ),
845
846        NodeValue::MultilineBlockQuote(_) => {
847            child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..))
848        }
849
850        NodeValue::Alert(_) => {
851            child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..))
852        }
853        _ => false,
854    }
855}
856
857pub(crate) fn ends_with_blank_line<'a>(node: &'a AstNode<'a>) -> bool {
858    let mut it = Some(node);
859    while let Some(cur) = it {
860        if cur.data.borrow().last_line_blank {
861            return true;
862        }
863        match cur.data.borrow().value {
864            NodeValue::List(..) | NodeValue::Item(..) | NodeValue::TaskItem(..) => {
865                it = cur.last_child()
866            }
867            _ => it = None,
868        };
869    }
870    false
871}
872
873pub(crate) fn containing_block<'a>(node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> {
874    let mut ch = Some(node);
875    while let Some(n) = ch {
876        if n.data.borrow().value.block() {
877            return Some(n);
878        }
879        ch = n.parent();
880    }
881    None
882}