//! The CommonMark AST.
use arena_tree::Node;
use std::cell::RefCell;
/// The core AST node enum.
#[derive(Debug, Clone)]
pub enum NodeValue {
/// The root of every CommonMark document. Contains **blocks**.
Document,
/// **Block**. A [block quote](https://github.github.com/gfm/#block-quotes). Contains other
/// **blocks**.
///
/// ``` md
/// > A block quote.
/// ```
BlockQuote,
/// **Block**. A [list](https://github.github.com/gfm/#lists). Contains
/// [list items](https://github.github.com/gfm/#list-items).
///
/// ``` md
/// * An unordered list
/// * Another item
///
/// 1. An ordered list
/// 2. Another item
/// ```
List(NodeList),
/// **Block**. A [list item](https://github.github.com/gfm/#list-items). Contains other
/// **blocks**.
Item(NodeList),
/// **Block**. A code block; may be [fenced](https://github.github.com/gfm/#fenced-code-blocks)
/// or [indented](https://github.github.com/gfm/#indented-code-blocks). Contains raw text
/// which is not parsed as Markdown, although is HTML escaped.
CodeBlock(NodeCodeBlock),
/// **Block**. A [HTML block](https://github.github.com/gfm/#html-blocks). Contains raw text
/// which is neither parsed as Markdown nor HTML escaped.
HtmlBlock(NodeHtmlBlock),
/// **Block**. A [paragraph](https://github.github.com/gfm/#paragraphs). Contains **inlines**.
Paragraph,
/// **Block**. A heading; may be an [ATX heading](https://github.github.com/gfm/#atx-headings)
/// or a [setext heading](https://github.github.com/gfm/#setext-headings). Contains
/// **inlines**.
Heading(NodeHeading),
/// **Block**. A [thematic break](https://github.github.com/gfm/#thematic-breaks). Has no
/// children.
ThematicBreak,
/// **Block**. A [table](https://github.github.com/gfm/#tables-extension-) per the GFM spec.
/// Contains table rows.
Table(Vec<TableAlignment>),
/// **Block**. A table row. The `bool` represents whether the row is the header row or not.
/// Contains table cells.
TableRow(bool),
/// **Block**. A table cell. Contains **inlines**.
TableCell,
/// **Inline**. [Textual content](https://github.github.com/gfm/#textual-content). All text
/// in a document will be contained in a `Text` node.
Text(String),
/// **Inline**. A [soft line break](https://github.github.com/gfm/#soft-line-breaks). If
/// the `hardbreaks` option is set in `ComrakOptions` during formatting, it will be formatted
/// as a `LineBreak`.
SoftBreak,
/// **Inline**. A [hard line break](https://github.github.com/gfm/#hard-line-breaks).
LineBreak,
/// **Inline**. A [code span](https://github.github.com/gfm/#code-spans).
Code(String),
/// **Inline**. [Raw HTML](https://github.github.com/gfm/#raw-html) contained inline.
HtmlInline(String),
/// **Inline**. [Emphasised](https://github.github.com/gfm/#emphasis-and-strong-emphasis)
/// text.
Emph,
/// **Inline**. [Strong](https://github.github.com/gfm/#emphasis-and-strong-emphasis) text.
Strong,
/// **Inline**. [Strikethrough](https://github.github.com/gfm/#strikethrough-extension-) text
/// per the GFM spec.
Strikethrough,
/// **Inline**. Superscript. Enabled with `ext_superscript` option.
Superscript,
/// **Inline**. A [link](https://github.github.com/gfm/#links) to some URL, with possible
/// title.
Link(NodeLink),
/// **Inline**. An [image](https://github.github.com/gfm/#images).
Image(NodeLink),
}
/// Alignment of a single table cell.
#[derive(Debug, Copy, Clone)]
pub enum TableAlignment {
/// Cell content is unaligned.
None,
/// Cell content is aligned left.
Left,
/// Cell content is centered.
Center,
/// Cell content is aligned right.
Right,
}
/// The details of a link's destination, or an image's source.
#[derive(Debug, Clone)]
pub struct NodeLink {
/// The URL for the link destination or image source.
pub url: String,
/// The title for the link or image.
///
/// Note this field is used for the `title` attribute by the HTML formatter even for images;
/// `alt` text is supplied in the image inline text.
pub title: String,
}
/// The metadata of a list; the kind of list, the delimiter used and so on.
#[derive(Debug, Default, Clone, Copy)]
pub struct NodeList {
/// The kind of list (bullet (unordered) or ordered).
pub list_type: ListType,
#[doc(hidden)]
pub marker_offset: usize,
#[doc(hidden)]
pub padding: usize,
/// For ordered lists, the ordinal the list starts at.
pub start: usize,
/// For ordered lists, the delimiter after each number.
pub delimiter: ListDelimType,
/// For bullet lists, the character used for each bullet.
pub bullet_char: u8,
/// Whether the list is [tight](https://github.github.com/gfm/#tight), i.e. whether the
/// paragraphs are wrapped in `<p>` tags when formatted as HTML.
pub tight: bool,
}
/// The type of list.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ListType {
/// A bullet list, i.e. an unordered list.
Bullet,
/// An ordered list.
Ordered,
}
impl Default for ListType {
fn default() -> ListType {
ListType::Bullet
}
}
/// The delimiter for ordered lists, i.e. the character which appears after each number.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ListDelimType {
/// A period character `.`.
Period,
/// A paren character `)`.
Paren,
}
impl Default for ListDelimType {
fn default() -> ListDelimType {
ListDelimType::Period
}
}
/// The metadata and data of a code block (fenced or indented).
#[derive(Default, Debug, Clone)]
pub struct NodeCodeBlock {
/// Whether the code block is fenced.
pub fenced: bool,
/// For fenced code blocks, the fence character itself (`` ` `` or `~`).
pub fence_char: u8,
/// For fenced code blocks, the length of the fence.
pub fence_length: usize,
#[doc(hidden)]
pub fence_offset: usize,
/// For fenced code blocks, the [info string](https://github.github.com/gfm/#info-string) after
/// the opening fence, if any.
pub info: String,
/// The literal contents of the code block. As the contents are not interpreted as Markdown at
/// all, they are contained within this structure, rather than inserted into a child inline of
/// any kind.
pub literal: String,
}
/// The metadata of a heading.
#[derive(Default, Debug, Clone, Copy)]
pub struct NodeHeading {
/// The level of the header; from 1 to 6 for ATX headings, 1 or 2 for setext headings.
pub level: u32,
/// Whether the heading is setext (if not, ATX).
pub setext: bool,
}
/// The metadata of an included HTML block.
#[derive(Debug, Clone)]
pub struct NodeHtmlBlock {
#[doc(hidden)]
pub block_type: u8,
/// The literal contents of the HTML block. Per NodeCodeBlock, the content is included here
/// rather than in any inline.
pub literal: String,
}
impl NodeValue {
/// Indicates whether this node is a block node or inline node.
pub fn block(&self) -> bool {
match *self {
NodeValue::Document |
NodeValue::BlockQuote |
NodeValue::List(..) |
NodeValue::Item(..) |
NodeValue::CodeBlock(..) |
NodeValue::HtmlBlock(..) |
NodeValue::Paragraph |
NodeValue::Heading(..) |
NodeValue::ThematicBreak |
NodeValue::Table(..) |
NodeValue::TableRow(..) |
NodeValue::TableCell => true,
_ => false,
}
}
#[doc(hidden)]
pub fn accepts_lines(&self) -> bool {
match *self {
NodeValue::Paragraph |
NodeValue::Heading(..) |
NodeValue::CodeBlock(..) => true,
_ => false,
}
}
/// Indicates whether this node may contain inlines.
pub fn contains_inlines(&self) -> bool {
match *self {
NodeValue::Paragraph |
NodeValue::Heading(..) |
NodeValue::TableCell => true,
_ => false,
}
}
/// Return a reference to the text of a `Text` inline, if this node is one.
///
/// Convenience method.
pub fn text(&self) -> Option<&String> {
match *self {
NodeValue::Text(ref t) => Some(t),
_ => None,
}
}
/// Return a mutable reference to the text of a `Text` inline, if this node is one.
///
/// Convenience method.
pub fn text_mut(&mut self) -> Option<&mut String> {
match *self {
NodeValue::Text(ref mut t) => Some(t),
_ => None,
}
}
}
/// A single node in the CommonMark AST.
///
/// The struct contains metadata about the node's position in the original document, and the core
/// enum, `NodeValue`.
#[derive(Debug, Clone)]
pub struct Ast {
/// The node value itself.
pub value: NodeValue,
/// The line in the input document the node starts at.
pub start_line: u32,
/// The column in the input document the node starts at.
pub start_column: usize,
/// The line in the input document the node ends at.
pub end_line: u32,
/// The column in the input document the node ends at.
pub end_column: usize,
#[doc(hidden)]
pub content: String,
#[doc(hidden)]
pub open: bool,
#[doc(hidden)]
pub last_line_blank: bool,
}
#[doc(hidden)]
pub fn make_block(value: NodeValue, start_line: u32, start_column: usize) -> Ast {
Ast {
value: value,
content: String::new(),
start_line: start_line,
start_column: start_column,
end_line: start_line,
end_column: 0,
open: true,
last_line_blank: false,
}
}
/// The type of a node within the document.
///
/// It is bound by the lifetime `'a`, which corresponds to the `Arena` nodes are allocated in.
/// `AstNode`s are almost handled as a reference itself bound by `'a`. Child `Ast`s are wrapped in
/// `RefCell` for interior mutability.
pub type AstNode<'a> = Node<'a, RefCell<Ast>>;
#[doc(hidden)]
pub fn last_child_is_open<'a>(node: &'a AstNode<'a>) -> bool {
node.last_child().map_or(false, |n| n.data.borrow().open)
}
#[doc(hidden)]
pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool {
if let NodeValue::Document = *child {
return false;
}
match node.data.borrow().value {
NodeValue::Document |
NodeValue::BlockQuote |
NodeValue::Item(..) => {
child.block() &&
match *child {
NodeValue::Item(..) => false,
_ => true,
}
}
NodeValue::List(..) => {
match *child {
NodeValue::Item(..) => true,
_ => false,
}
}
NodeValue::Paragraph |
NodeValue::Heading(..) |
NodeValue::Emph |
NodeValue::Strong |
NodeValue::Link(..) |
NodeValue::Image(..) => !child.block(),
NodeValue::Table(..) => {
match *child {
NodeValue::TableRow(..) => true,
_ => false,
}
}
NodeValue::TableRow(..) => {
match *child {
NodeValue::TableCell => true,
_ => false,
}
}
NodeValue::TableCell => {
match *child {
NodeValue::Text(..) |
NodeValue::Code(..) |
NodeValue::Emph |
NodeValue::Strong |
NodeValue::Link(..) |
NodeValue::Image(..) |
NodeValue::Strikethrough |
NodeValue::HtmlInline(..) => true,
_ => false,
}
}
_ => false,
}
}
#[doc(hidden)]
pub fn ends_with_blank_line<'a>(node: &'a AstNode<'a>) -> bool {
let mut it = Some(node);
while let Some(cur) = it {
if cur.data.borrow().last_line_blank {
return true;
}
match cur.data.borrow().value {
NodeValue::List(..) |
NodeValue::Item(..) => it = cur.last_child(),
_ => it = None,
};
}
false
}
#[doc(hidden)]
pub fn containing_block<'a>(node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> {
let mut ch = Some(node);
while let Some(n) = ch {
if n.data.borrow().value.block() {
return Some(n);
}
ch = n.parent();
}
None
}