comrak/nodes.rs
1//! The CommonMark AST.
2
3use crate::arena_tree::Node;
4use std::cell::RefCell;
5use std::convert::TryFrom;
6
7#[cfg(feature = "shortcodes")]
8pub use crate::parser::shortcodes::NodeShortCode;
9
10pub use crate::parser::alert::{AlertType, NodeAlert};
11pub use crate::parser::math::NodeMath;
12pub use crate::parser::multiline_block_quote::NodeMultilineBlockQuote;
13
14/// The core AST node enum.
15#[derive(Debug, Clone, PartialEq, Eq)]
16#[cfg_attr(test, derive(strum::EnumDiscriminants))]
17#[cfg_attr(
18 test,
19 strum_discriminants(vis(pub(crate)), derive(strum::VariantArray, Hash))
20)]
21pub enum NodeValue {
22 /// The root of every CommonMark document. Contains **blocks**.
23 Document,
24
25 /// Non-Markdown front matter. Treated as an opaque blob.
26 FrontMatter(String),
27
28 /// **Block**. A [block quote](https://github.github.com/gfm/#block-quotes). Contains other
29 /// **blocks**.
30 ///
31 /// ``` md
32 /// > A block quote.
33 /// ```
34 BlockQuote,
35
36 /// **Block**. A [list](https://github.github.com/gfm/#lists). Contains
37 /// [list items](https://github.github.com/gfm/#list-items).
38 ///
39 /// ``` md
40 /// * An unordered list
41 /// * Another item
42 ///
43 /// 1. An ordered list
44 /// 2. Another item
45 /// ```
46 List(NodeList),
47
48 /// **Block**. A [list item](https://github.github.com/gfm/#list-items). Contains other
49 /// **blocks**.
50 Item(NodeList),
51
52 /// **Block**. A description list, enabled with `ext_description_lists` option. Contains
53 /// description items.
54 ///
55 /// It is required to put a blank line between terms and details.
56 ///
57 /// ``` md
58 /// Term 1
59 ///
60 /// : Details 1
61 ///
62 /// Term 2
63 ///
64 /// : Details 2
65 /// ```
66 DescriptionList,
67
68 /// *Block**. An item of a description list. Contains a term and one details block.
69 DescriptionItem(NodeDescriptionItem),
70
71 /// **Block**. Term of an item in a definition list.
72 DescriptionTerm,
73
74 /// **Block**. Details of an item in a definition list.
75 DescriptionDetails,
76
77 /// **Block**. A code block; may be [fenced](https://github.github.com/gfm/#fenced-code-blocks)
78 /// or [indented](https://github.github.com/gfm/#indented-code-blocks). Contains raw text
79 /// which is not parsed as Markdown, although is HTML escaped.
80 CodeBlock(NodeCodeBlock),
81
82 /// **Block**. A [HTML block](https://github.github.com/gfm/#html-blocks). Contains raw text
83 /// which is neither parsed as Markdown nor HTML escaped.
84 HtmlBlock(NodeHtmlBlock),
85
86 /// **Block**. A [paragraph](https://github.github.com/gfm/#paragraphs). Contains **inlines**.
87 Paragraph,
88
89 /// **Block**. A heading; may be an [ATX heading](https://github.github.com/gfm/#atx-headings)
90 /// or a [setext heading](https://github.github.com/gfm/#setext-headings). Contains
91 /// **inlines**.
92 Heading(NodeHeading),
93
94 /// **Block**. A [thematic break](https://github.github.com/gfm/#thematic-breaks). Has no
95 /// children.
96 ThematicBreak,
97
98 /// **Block**. A footnote definition. The `String` is the footnote's name.
99 /// Contains other **blocks**.
100 FootnoteDefinition(NodeFootnoteDefinition),
101
102 /// **Block**. A [table](https://github.github.com/gfm/#tables-extension-) per the GFM spec.
103 /// Contains table rows.
104 Table(NodeTable),
105
106 /// **Block**. A table row. The `bool` represents whether the row is the header row or not.
107 /// Contains table cells.
108 TableRow(bool),
109
110 /// **Block**. A table cell. Contains **inlines**.
111 TableCell,
112
113 /// **Inline**. [Textual content](https://github.github.com/gfm/#textual-content). All text
114 /// in a document will be contained in a `Text` node.
115 Text(String),
116
117 /// **Block**. [Task list item](https://github.github.com/gfm/#task-list-items-extension-).
118 /// The value is the symbol that was used in the brackets to mark a task item as checked, or
119 /// None if the item is unchecked.
120 TaskItem(Option<char>),
121
122 /// **Inline**. A [soft line break](https://github.github.com/gfm/#soft-line-breaks). If
123 /// the `hardbreaks` option is set in `Options` during formatting, it will be formatted
124 /// as a `LineBreak`.
125 SoftBreak,
126
127 /// **Inline**. A [hard line break](https://github.github.com/gfm/#hard-line-breaks).
128 LineBreak,
129
130 /// **Inline**. A [code span](https://github.github.com/gfm/#code-spans).
131 Code(NodeCode),
132
133 /// **Inline**. [Raw HTML](https://github.github.com/gfm/#raw-html) contained inline.
134 HtmlInline(String),
135
136 /// **Block/Inline**. A Raw output node. This will be inserted verbatim into CommonMark and
137 /// HTML output. It can only be created programmatically, and is never parsed from input.
138 Raw(String),
139
140 /// **Inline**. [Emphasized](https://github.github.com/gfm/#emphasis-and-strong-emphasis)
141 /// text.
142 Emph,
143
144 /// **Inline**. [Strong](https://github.github.com/gfm/#emphasis-and-strong-emphasis) text.
145 Strong,
146
147 /// **Inline**. [Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) text
148 /// per the GFM spec.
149 Strikethrough,
150
151 /// **Inline**. Superscript. Enabled with `ext_superscript` option.
152 Superscript,
153
154 /// **Inline**. A [link](https://github.github.com/gfm/#links) to some URL, with possible
155 /// title.
156 Link(NodeLink),
157
158 /// **Inline**. An [image](https://github.github.com/gfm/#images).
159 Image(NodeLink),
160
161 /// **Inline**. A footnote reference.
162 FootnoteReference(NodeFootnoteReference),
163
164 #[cfg(feature = "shortcodes")]
165 /// **Inline**. An Emoji character generated from a shortcode. Enable with feature "shortcodes".
166 ShortCode(NodeShortCode),
167
168 /// **Inline**. A math span. Contains raw text which is not parsed as Markdown.
169 /// Dollar math or code math
170 ///
171 /// Inline math $1 + 2$ and $`1 + 2`$
172 ///
173 /// Display math $$1 + 2$$ and
174 /// $$
175 /// 1 + 2
176 /// $$
177 ///
178 Math(NodeMath),
179
180 /// **Block**. A [multiline block quote](https://github.github.com/gfm/#block-quotes). Spans multiple
181 /// lines and contains other **blocks**.
182 ///
183 /// ``` md
184 /// >>>
185 /// A paragraph.
186 ///
187 /// - item one
188 /// - item two
189 /// >>>
190 /// ```
191 MultilineBlockQuote(NodeMultilineBlockQuote),
192
193 /// **Inline**. A character that has been [escaped](https://github.github.com/gfm/#backslash-escapes)
194 ///
195 /// Enabled with [`escaped_char_spans`](crate::RenderOptionsBuilder::escaped_char_spans).
196 Escaped,
197
198 /// **Inline**. A wikilink to some URL.
199 WikiLink(NodeWikiLink),
200
201 /// **Inline**. Underline. Enabled with `underline` option.
202 Underline,
203
204 /// **Inline**. Subscript. Enabled with `subscript` options.
205 Subscript,
206
207 /// **Inline**. Spoilered text. Enabled with `spoiler` option.
208 SpoileredText,
209
210 /// **Inline**. Text surrounded by escaped markup. Enabled with `spoiler` option.
211 /// The `String` is the tag to be escaped.
212 EscapedTag(String),
213
214 /// **Block**. GitHub style alert boxes which uses a modified blockquote syntax.
215 /// Enabled with the `alerts` option.
216 Alert(NodeAlert),
217}
218
219/// Alignment of a single table cell.
220#[derive(Debug, Copy, Clone, PartialEq, Eq)]
221pub enum TableAlignment {
222 /// Cell content is unaligned.
223 None,
224
225 /// Cell content is aligned left.
226 Left,
227
228 /// Cell content is centered.
229 Center,
230
231 /// Cell content is aligned right.
232 Right,
233}
234
235impl TableAlignment {
236 pub(crate) fn xml_name(&self) -> Option<&'static str> {
237 match *self {
238 TableAlignment::None => None,
239 TableAlignment::Left => Some("left"),
240 TableAlignment::Center => Some("center"),
241 TableAlignment::Right => Some("right"),
242 }
243 }
244}
245
246/// The metadata of a table
247#[derive(Debug, Default, Clone, PartialEq, Eq)]
248pub struct NodeTable {
249 /// The table alignments
250 pub alignments: Vec<TableAlignment>,
251
252 /// Number of columns of the table
253 pub num_columns: usize,
254
255 /// Number of rows of the table
256 pub num_rows: usize,
257
258 /// Number of non-empty, non-autocompleted cells
259 pub num_nonempty_cells: usize,
260}
261
262/// An inline [code span](https://github.github.com/gfm/#code-spans).
263#[derive(Default, Debug, Clone, PartialEq, Eq)]
264pub struct NodeCode {
265 /// The number of backticks
266 pub num_backticks: usize,
267
268 /// The content of the inline code span.
269 /// As the contents are not interpreted as Markdown at all,
270 /// they are contained within this structure,
271 /// rather than inserted into a child inline of any kind.
272 pub literal: String,
273}
274
275/// The details of a link's destination, or an image's source.
276#[derive(Default, Debug, Clone, PartialEq, Eq)]
277pub struct NodeLink {
278 /// The URL for the link destination or image source.
279 pub url: String,
280
281 /// The title for the link or image.
282 ///
283 /// Note this field is used for the `title` attribute by the HTML formatter even for images;
284 /// `alt` text is supplied in the image inline text.
285 pub title: String,
286}
287
288/// The details of a wikilink's destination.
289#[derive(Default, Debug, Clone, PartialEq, Eq)]
290pub struct NodeWikiLink {
291 /// The URL for the link destination.
292 pub url: String,
293}
294
295/// The metadata of a list; the kind of list, the delimiter used and so on.
296#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
297pub struct NodeList {
298 /// The kind of list (bullet (unordered) or ordered).
299 pub list_type: ListType,
300
301 /// Number of spaces before the list marker.
302 pub marker_offset: usize,
303
304 /// Number of characters between the start of the list marker and the item text (including the list marker(s)).
305 pub padding: usize,
306
307 /// For ordered lists, the ordinal the list starts at.
308 pub start: usize,
309
310 /// For ordered lists, the delimiter after each number.
311 pub delimiter: ListDelimType,
312
313 /// For bullet lists, the character used for each bullet.
314 pub bullet_char: u8,
315
316 /// Whether the list is [tight](https://github.github.com/gfm/#tight), i.e. whether the
317 /// paragraphs are wrapped in `<p>` tags when formatted as HTML.
318 pub tight: bool,
319
320 /// Whether the list contains tasks (checkbox items)
321 pub is_task_list: bool,
322}
323
324/// The metadata of a description list
325#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
326pub struct NodeDescriptionItem {
327 /// Number of spaces before the list marker.
328 pub marker_offset: usize,
329
330 /// Number of characters between the start of the list marker and the item text (including the list marker(s)).
331 pub padding: usize,
332
333 /// Whether the list is [tight](https://github.github.com/gfm/#tight), i.e. whether the
334 /// paragraphs are wrapped in `<p>` tags when formatted as HTML.
335 pub tight: bool,
336}
337
338/// The type of list.
339#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
340pub enum ListType {
341 /// A bullet list, i.e. an unordered list.
342 #[default]
343 Bullet,
344
345 /// An ordered list.
346 Ordered,
347}
348
349/// The delimiter for ordered lists, i.e. the character which appears after each number.
350#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
351pub enum ListDelimType {
352 /// A period character `.`.
353 #[default]
354 Period,
355
356 /// A paren character `)`.
357 Paren,
358}
359
360impl ListDelimType {
361 pub(crate) fn xml_name(&self) -> &'static str {
362 match *self {
363 ListDelimType::Period => "period",
364 ListDelimType::Paren => "paren",
365 }
366 }
367}
368
369/// The metadata and data of a code block (fenced or indented).
370#[derive(Default, Debug, Clone, PartialEq, Eq)]
371pub struct NodeCodeBlock {
372 /// Whether the code block is fenced.
373 pub fenced: bool,
374
375 /// For fenced code blocks, the fence character itself (`` ` `` or `~`).
376 pub fence_char: u8,
377
378 /// For fenced code blocks, the length of the fence.
379 pub fence_length: usize,
380
381 /// For fenced code blocks, the indentation level of the code within the block.
382 pub fence_offset: usize,
383
384 /// For fenced code blocks, the [info string](https://github.github.com/gfm/#info-string) after
385 /// the opening fence, if any.
386 pub info: String,
387
388 /// The literal contents of the code block. As the contents are not interpreted as Markdown at
389 /// all, they are contained within this structure, rather than inserted into a child inline of
390 /// any kind.
391 pub literal: String,
392}
393
394/// The metadata of a heading.
395#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
396pub struct NodeHeading {
397 /// The level of the header; from 1 to 6 for ATX headings, 1 or 2 for setext headings.
398 pub level: u8,
399
400 /// Whether the heading is setext (if not, ATX).
401 pub setext: bool,
402}
403
404/// The metadata of an included HTML block.
405#[derive(Debug, Default, Clone, PartialEq, Eq)]
406pub struct NodeHtmlBlock {
407 /// The HTML block's type
408 pub block_type: u8,
409
410 /// The literal contents of the HTML block. Per NodeCodeBlock, the content is included here
411 /// rather than in any inline.
412 pub literal: String,
413}
414
415/// The metadata of a footnote definition.
416#[derive(Debug, Default, Clone, PartialEq, Eq)]
417pub struct NodeFootnoteDefinition {
418 /// The name of the footnote.
419 pub name: String,
420
421 /// Total number of references to this footnote
422 pub total_references: u32,
423}
424
425/// The metadata of a footnote reference.
426#[derive(Debug, Default, Clone, PartialEq, Eq)]
427pub struct NodeFootnoteReference {
428 /// The name of the footnote.
429 pub name: String,
430
431 /// The index of reference to the same footnote
432 pub ref_num: u32,
433
434 /// The index of the footnote in the document.
435 pub ix: u32,
436}
437
438impl NodeValue {
439 /// Indicates whether this node is a block node or inline node.
440 pub fn block(&self) -> bool {
441 matches!(
442 *self,
443 NodeValue::Document
444 | NodeValue::BlockQuote
445 | NodeValue::FootnoteDefinition(_)
446 | NodeValue::List(..)
447 | NodeValue::DescriptionList
448 | NodeValue::DescriptionItem(_)
449 | NodeValue::DescriptionTerm
450 | NodeValue::DescriptionDetails
451 | NodeValue::Item(..)
452 | NodeValue::CodeBlock(..)
453 | NodeValue::HtmlBlock(..)
454 | NodeValue::Paragraph
455 | NodeValue::Heading(..)
456 | NodeValue::ThematicBreak
457 | NodeValue::Table(..)
458 | NodeValue::TableRow(..)
459 | NodeValue::TableCell
460 | NodeValue::TaskItem(..)
461 | NodeValue::MultilineBlockQuote(_)
462 | NodeValue::Alert(_)
463 )
464 }
465
466 /// Whether the type the node is of can contain inline nodes.
467 pub fn contains_inlines(&self) -> bool {
468 matches!(
469 *self,
470 NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::TableCell
471 )
472 }
473
474 /// Return a reference to the text of a `Text` inline, if this node is one.
475 ///
476 /// Convenience method.
477 pub fn text(&self) -> Option<&String> {
478 match *self {
479 NodeValue::Text(ref t) => Some(t),
480 _ => None,
481 }
482 }
483
484 /// Return a mutable reference to the text of a `Text` inline, if this node is one.
485 ///
486 /// Convenience method.
487 pub fn text_mut(&mut self) -> Option<&mut String> {
488 match *self {
489 NodeValue::Text(ref mut t) => Some(t),
490 _ => None,
491 }
492 }
493
494 pub(crate) fn accepts_lines(&self) -> bool {
495 matches!(
496 *self,
497 NodeValue::Paragraph | NodeValue::Heading(..) | NodeValue::CodeBlock(..)
498 )
499 }
500
501 pub(crate) fn xml_node_name(&self) -> &'static str {
502 match *self {
503 NodeValue::Document => "document",
504 NodeValue::BlockQuote => "block_quote",
505 NodeValue::FootnoteDefinition(_) => "footnote_definition",
506 NodeValue::List(..) => "list",
507 NodeValue::DescriptionList => "description_list",
508 NodeValue::DescriptionItem(_) => "description_item",
509 NodeValue::DescriptionTerm => "description_term",
510 NodeValue::DescriptionDetails => "description_details",
511 NodeValue::Item(..) => "item",
512 NodeValue::CodeBlock(..) => "code_block",
513 NodeValue::HtmlBlock(..) => "html_block",
514 NodeValue::Paragraph => "paragraph",
515 NodeValue::Heading(..) => "heading",
516 NodeValue::ThematicBreak => "thematic_break",
517 NodeValue::Table(..) => "table",
518 NodeValue::TableRow(..) => "table_row",
519 NodeValue::TableCell => "table_cell",
520 NodeValue::Text(..) => "text",
521 NodeValue::SoftBreak => "softbreak",
522 NodeValue::LineBreak => "linebreak",
523 NodeValue::Image(..) => "image",
524 NodeValue::Link(..) => "link",
525 NodeValue::Emph => "emph",
526 NodeValue::Strong => "strong",
527 NodeValue::Code(..) => "code",
528 NodeValue::HtmlInline(..) => "html_inline",
529 NodeValue::Raw(..) => "raw",
530 NodeValue::Strikethrough => "strikethrough",
531 NodeValue::FrontMatter(_) => "frontmatter",
532 NodeValue::TaskItem { .. } => "taskitem",
533 NodeValue::Superscript => "superscript",
534 NodeValue::FootnoteReference(..) => "footnote_reference",
535 #[cfg(feature = "shortcodes")]
536 NodeValue::ShortCode(_) => "shortcode",
537 NodeValue::MultilineBlockQuote(_) => "multiline_block_quote",
538 NodeValue::Escaped => "escaped",
539 NodeValue::Math(..) => "math",
540 NodeValue::WikiLink(..) => "wikilink",
541 NodeValue::Underline => "underline",
542 NodeValue::Subscript => "subscript",
543 NodeValue::SpoileredText => "spoiler",
544 NodeValue::EscapedTag(_) => "escaped_tag",
545 NodeValue::Alert(_) => "alert",
546 }
547 }
548}
549
550/// A single node in the CommonMark AST.
551///
552/// The struct contains metadata about the node's position in the original document, and the core
553/// enum, `NodeValue`.
554#[derive(Debug, Clone, PartialEq, Eq)]
555pub struct Ast {
556 /// The node value itself.
557 pub value: NodeValue,
558
559 /// The positions in the source document this node comes from.
560 pub sourcepos: Sourcepos,
561 pub(crate) internal_offset: usize,
562
563 pub(crate) content: String,
564 pub(crate) open: bool,
565 pub(crate) last_line_blank: bool,
566 pub(crate) table_visited: bool,
567 pub(crate) line_offsets: Vec<usize>,
568}
569
570/// Represents the position in the source Markdown this node was rendered from.
571#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
572pub struct Sourcepos {
573 /// The line and column of the first character of this node.
574 pub start: LineColumn,
575 /// The line and column of the last character of this node.
576 pub end: LineColumn,
577}
578
579impl std::fmt::Display for Sourcepos {
580 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
581 write!(
582 f,
583 "{}:{}-{}:{}",
584 self.start.line, self.start.column, self.end.line, self.end.column,
585 )
586 }
587}
588
589impl From<(usize, usize, usize, usize)> for Sourcepos {
590 fn from(sp: (usize, usize, usize, usize)) -> Sourcepos {
591 Sourcepos {
592 start: LineColumn {
593 line: sp.0,
594 column: sp.1,
595 },
596 end: LineColumn {
597 line: sp.2,
598 column: sp.3,
599 },
600 }
601 }
602}
603
604/// Represents the 1-based line and column positions of a given character.
605#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
606pub struct LineColumn {
607 /// The 1-based line number of the character.
608 pub line: usize,
609 /// The 1-based column number of the character.
610 pub column: usize,
611}
612
613impl From<(usize, usize)> for LineColumn {
614 fn from(lc: (usize, usize)) -> LineColumn {
615 LineColumn {
616 line: lc.0,
617 column: lc.1,
618 }
619 }
620}
621
622impl LineColumn {
623 /// Return a new LineColumn based on this one, with the column adjusted by offset.
624 pub fn column_add(&self, offset: isize) -> LineColumn {
625 LineColumn {
626 line: self.line,
627 column: usize::try_from((self.column as isize) + offset).unwrap(),
628 }
629 }
630}
631
632impl Ast {
633 /// Create a new AST node with the given value.
634 pub fn new(value: NodeValue, start: LineColumn) -> Self {
635 Ast {
636 value,
637 content: String::new(),
638 sourcepos: (start.line, start.column, start.line, 0).into(),
639 internal_offset: 0,
640 open: true,
641 last_line_blank: false,
642 table_visited: false,
643 line_offsets: Vec::with_capacity(0),
644 }
645 }
646}
647
648/// The type of a node within the document.
649///
650/// It is bound by the lifetime `'a`, which corresponds to the `Arena` nodes are
651/// allocated in. Child `Ast`s are wrapped in `RefCell` for interior mutability.
652///
653/// You can construct a new `AstNode` from a `NodeValue` using the `From` trait:
654///
655/// ```no_run
656/// # use comrak::nodes::{AstNode, NodeValue};
657/// let root = AstNode::from(NodeValue::Document);
658/// ```
659///
660/// Note that no sourcepos information is given to the created node. If you wish
661/// to assign sourcepos information, use the `From` trait to create an `AstNode`
662/// from an `Ast`:
663///
664/// ```no_run
665/// # use comrak::nodes::{Ast, AstNode, NodeValue};
666/// let root = AstNode::from(Ast::new(
667/// NodeValue::Paragraph,
668/// (4, 1).into(), // start_line, start_col
669/// ));
670/// ```
671///
672/// Adjust the `end` position manually.
673///
674/// For practical use, you'll probably need it allocated in an `Arena`, in which
675/// case you can use `.into()` to simplify creation:
676///
677/// ```no_run
678/// # use comrak::{nodes::{AstNode, NodeValue}, Arena};
679/// # let arena = Arena::<AstNode>::new();
680/// let node_in_arena = arena.alloc(NodeValue::Document.into());
681/// ```
682pub type AstNode<'a> = Node<'a, RefCell<Ast>>;
683
684impl<'a> From<NodeValue> for AstNode<'a> {
685 /// Create a new AST node with the given value. The sourcepos is set to (0,0)-(0,0).
686 fn from(value: NodeValue) -> Self {
687 Node::new(RefCell::new(Ast::new(value, LineColumn::default())))
688 }
689}
690
691impl<'a> From<Ast> for AstNode<'a> {
692 /// Create a new AST node with the given Ast.
693 fn from(ast: Ast) -> Self {
694 Node::new(RefCell::new(ast))
695 }
696}
697
698/// Validation errors produced by [Node::validate].
699#[derive(Debug, Clone)]
700pub enum ValidationError<'a> {
701 /// The type of a child node is not allowed in the parent node. This can happen when an inline
702 /// node is found in a block container, a block is found in an inline node, etc.
703 InvalidChildType {
704 /// The parent node.
705 parent: &'a AstNode<'a>,
706 /// The child node.
707 child: &'a AstNode<'a>,
708 },
709}
710
711impl<'a> Node<'a, RefCell<Ast>> {
712 /// The comrak representation of a markdown node in Rust isn't strict enough to rule out
713 /// invalid trees according to the CommonMark specification. One simple example is that block
714 /// containers, such as lists, should only contain blocks, but it's possible to put naked
715 /// inline text in a list item. Such invalid trees can lead comrak to generate incorrect output
716 /// if rendered.
717 ///
718 /// This method performs additional structural checks to ensure that a markdown AST is valid
719 /// according to the CommonMark specification.
720 ///
721 /// Note that those invalid trees can only be generated programmatically. Parsing markdown with
722 /// comrak, on the other hand, should always produce a valid tree.
723 pub fn validate(&'a self) -> Result<(), ValidationError<'a>> {
724 let mut stack = vec![self];
725
726 while let Some(node) = stack.pop() {
727 // Check that this node type is valid wrt to the type of its parent.
728 if let Some(parent) = node.parent() {
729 if !can_contain_type(parent, &node.data.borrow().value) {
730 return Err(ValidationError::InvalidChildType {
731 parent,
732 child: node,
733 });
734 }
735 }
736
737 stack.extend(node.children());
738 }
739
740 Ok(())
741 }
742}
743
744pub(crate) fn last_child_is_open<'a>(node: &'a AstNode<'a>) -> bool {
745 node.last_child().map_or(false, |n| n.data.borrow().open)
746}
747
748/// Returns true if the given node can contain a node with the given value.
749pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
750 match *child {
751 NodeValue::Document => {
752 return false;
753 }
754 NodeValue::FrontMatter(_) => {
755 return matches!(node.data.borrow().value, NodeValue::Document);
756 }
757 _ => {}
758 }
759
760 match node.data.borrow().value {
761 NodeValue::Document
762 | NodeValue::BlockQuote
763 | NodeValue::FootnoteDefinition(_)
764 | NodeValue::DescriptionTerm
765 | NodeValue::DescriptionDetails
766 | NodeValue::Item(..)
767 | NodeValue::TaskItem(..) => {
768 child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..))
769 }
770
771 NodeValue::List(..) => matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..)),
772
773 NodeValue::DescriptionList => matches!(*child, NodeValue::DescriptionItem(_)),
774
775 NodeValue::DescriptionItem(_) => matches!(
776 *child,
777 NodeValue::DescriptionTerm | NodeValue::DescriptionDetails
778 ),
779
780 #[cfg(feature = "shortcodes")]
781 NodeValue::ShortCode(..) => !child.block(),
782
783 NodeValue::Paragraph
784 | NodeValue::Heading(..)
785 | NodeValue::Emph
786 | NodeValue::Strong
787 | NodeValue::Link(..)
788 | NodeValue::Image(..)
789 | NodeValue::WikiLink(..)
790 | NodeValue::Strikethrough
791 | NodeValue::Superscript
792 | NodeValue::SpoileredText
793 | NodeValue::Underline
794 | NodeValue::Subscript
795 // XXX: this is quite a hack: the EscapedTag _contains_ whatever was
796 // possibly going to fall into the spoiler. This should be fixed in
797 // inlines.
798 | NodeValue::EscapedTag(_)
799 => !child.block(),
800
801 NodeValue::Table(..) => matches!(*child, NodeValue::TableRow(..)),
802
803 NodeValue::TableRow(..) => matches!(*child, NodeValue::TableCell),
804
805 #[cfg(not(feature = "shortcodes"))]
806 NodeValue::TableCell => matches!(
807 *child,
808 NodeValue::Text(..)
809 | NodeValue::Code(..)
810 | NodeValue::Emph
811 | NodeValue::Strong
812 | NodeValue::Link(..)
813 | NodeValue::Image(..)
814 | NodeValue::Strikethrough
815 | NodeValue::HtmlInline(..)
816 | NodeValue::Math(..)
817 | NodeValue::WikiLink(..)
818 | NodeValue::FootnoteReference(..)
819 | NodeValue::Superscript
820 | NodeValue::SpoileredText
821 | NodeValue::Underline
822 | NodeValue::Subscript
823 ),
824
825 #[cfg(feature = "shortcodes")]
826 NodeValue::TableCell => matches!(
827 *child,
828 NodeValue::Text(..)
829 | NodeValue::Code(..)
830 | NodeValue::Emph
831 | NodeValue::Strong
832 | NodeValue::Link(..)
833 | NodeValue::Image(..)
834 | NodeValue::Strikethrough
835 | NodeValue::HtmlInline(..)
836 | NodeValue::Math(..)
837 | NodeValue::WikiLink(..)
838 | NodeValue::FootnoteReference(..)
839 | NodeValue::Superscript
840 | NodeValue::SpoileredText
841 | NodeValue::Underline
842 | NodeValue::Subscript
843 | NodeValue::ShortCode(..)
844 ),
845
846 NodeValue::MultilineBlockQuote(_) => {
847 child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..))
848 }
849
850 NodeValue::Alert(_) => {
851 child.block() && !matches!(*child, NodeValue::Item(..) | NodeValue::TaskItem(..))
852 }
853 _ => false,
854 }
855}
856
857pub(crate) fn ends_with_blank_line<'a>(node: &'a AstNode<'a>) -> bool {
858 let mut it = Some(node);
859 while let Some(cur) = it {
860 if cur.data.borrow().last_line_blank {
861 return true;
862 }
863 match cur.data.borrow().value {
864 NodeValue::List(..) | NodeValue::Item(..) | NodeValue::TaskItem(..) => {
865 it = cur.last_child()
866 }
867 _ => it = None,
868 };
869 }
870 false
871}
872
873pub(crate) fn containing_block<'a>(node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> {
874 let mut ch = Some(node);
875 while let Some(n) = ch {
876 if n.data.borrow().value.block() {
877 return Some(n);
878 }
879 ch = n.parent();
880 }
881 None
882}