markdown_ppp/ast/
mod.rs

1//! Fully‑typed Abstract Syntax Tree (AST) for CommonMark + GitHub Flavored Markdown (GFM)
2//! ------------------------------------------------------------------------------------
3//! This module models every construct described in the **CommonMark 1.0 specification**
4//! together with the widely‑used **GFM extensions**: tables, strikethrough, autolinks,
5//! task‑list items and footnotes.
6//!
7//! The design separates **block‑level** and **inline‑level** nodes because parsers and
8//! renderers typically operate on these tiers independently.
9//!
10//! ```text
11//! Document ─┐
12//!           └─ Block ─┐
13//!                     ├─ Inline
14//!                     └─ ...
15//! ```
16
17// ——————————————————————————————————————————————————————————————————————————
18// Document root
19// ——————————————————————————————————————————————————————————————————————————
20
21/// Root of a Markdown document
22#[derive(Debug, Clone, PartialEq)]
23#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
24pub struct Document {
25    /// Top‑level block sequence **in document order**.
26    pub blocks: Vec<Block>,
27}
28
29// ——————————————————————————————————————————————————————————————————————————
30// Block‑level nodes
31// ——————————————————————————————————————————————————————————————————————————
32
33/// Block‑level constructs in the order they appear in the CommonMark spec.
34#[derive(Debug, Clone, PartialEq)]
35#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
36pub enum Block {
37    /// Ordinary paragraph
38    Paragraph(Vec<Inline>),
39
40    /// ATX (`# Heading`) or Setext (`===`) heading
41    Heading(Heading),
42
43    /// Thematic break (horizontal rule)
44    ThematicBreak,
45
46    /// Block quote
47    BlockQuote(Vec<Block>),
48
49    /// List (bullet or ordered)
50    List(List),
51
52    /// Fenced or indented code block
53    CodeBlock(CodeBlock),
54
55    /// Raw HTML block
56    HtmlBlock(String),
57
58    /// Link reference definition.  Preserved for round‑tripping.
59    Definition(LinkDefinition),
60
61    /// Tables
62    Table(Table),
63
64    /// Footnote definition
65    FootnoteDefinition(FootnoteDefinition),
66
67    /// Empty block. This is used to represent skipped blocks in the AST.
68    Empty,
69}
70
71/// Heading with level 1–6 and inline content.
72#[derive(Debug, Clone, PartialEq)]
73#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
74pub struct Heading {
75    /// Heading level (1 ⇒ `<h1>`, …, 6 ⇒ `<h6>`).
76    pub level: u8,
77
78    /// Inlines that form the heading text (before trimming).
79    pub content: Vec<Inline>,
80}
81
82// ——————————————————————————————————————————————————————————————————————————
83// Lists
84// ——————————————————————————————————————————————————————————————————————————
85
86/// A list container — bullet or ordered.
87#[derive(Debug, Clone, PartialEq)]
88#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
89pub struct List {
90    /// Kind of list together with additional semantic data (start index or
91    /// bullet marker).
92    pub kind: ListKind,
93
94    /// List items in source order.
95    pub items: Vec<ListItem>,
96}
97
98/// Specifies *what kind* of list we have.
99#[derive(Debug, Clone, PartialEq)]
100#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
101pub enum ListKind {
102    /// Ordered list (`1.`, `42.` …) with an *optional* explicit start number.
103    Ordered(ListOrderedKindOptions),
104
105    /// Bullet list (`-`, `*`, or `+`) together with the concrete marker.
106    Bullet(ListBulletKind),
107}
108
109/// Specifies *what kind* of list we have.
110#[derive(Debug, Clone, PartialEq)]
111#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
112pub struct ListOrderedKindOptions {
113    /// Start index (1, 2, …) for ordered lists.
114    pub start: u64,
115}
116
117/// Concrete bullet character used for a bullet list.
118#[derive(Debug, Clone, Copy, PartialEq, Eq)]
119#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
120pub enum ListBulletKind {
121    /// `-` U+002D
122    Dash,
123
124    /// `*` U+002A
125    Star,
126
127    /// `+` U+002B
128    Plus,
129}
130
131/// Item within a list.
132#[derive(Debug, Clone, PartialEq)]
133#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
134pub struct ListItem {
135    /// Task‑list checkbox state (GFM task‑lists). `None` ⇒ not a task list.
136    pub task: Option<TaskState>,
137
138    /// Nested blocks inside the list item.
139    pub blocks: Vec<Block>,
140}
141
142/// State of a task‑list checkbox.
143#[derive(Debug, Clone, Copy, PartialEq, Eq)]
144#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
145pub enum TaskState {
146    /// Unchecked (GFM task‑list item)
147    Incomplete,
148
149    /// Checked (GFM task‑list item)
150    Complete,
151}
152
153// ——————————————————————————————————————————————————————————————————————————
154// Code blocks
155// ——————————————————————————————————————————————————————————————————————————
156
157/// Fenced or indented code block.
158#[derive(Debug, Clone, PartialEq)]
159#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
160pub struct CodeBlock {
161    /// Distinguishes indented vs fenced code and stores the *info string*.
162    pub kind: CodeBlockKind,
163
164    /// Literal text inside the code block **without** final newline trimming.
165    pub literal: String,
166}
167
168/// The concrete kind of a code block.
169#[derive(Debug, Clone, PartialEq)]
170#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
171pub enum CodeBlockKind {
172    /// Indented block (≥ 4 spaces or 1 tab per line).
173    Indented,
174
175    /// Fenced block with *optional* info string (language, etc.).
176    Fenced { info: Option<String> },
177}
178
179// ——————————————————————————————————————————————————————————————————————————
180// Link reference definitions
181// ——————————————————————————————————————————————————————————————————————————
182
183/// Link reference definition (GFM) with a label, destination and optional title.
184#[derive(Debug, Clone, PartialEq)]
185#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
186pub struct LinkDefinition {
187    /// Normalized label (acts as the *identifier*).
188    pub label: String,
189
190    /// Link URL (absolute or relative) or email address.
191    pub destination: String,
192
193    /// Optional title (for links and images).
194    pub title: Option<String>,
195}
196
197// ——————————————————————————————————————————————————————————————————————————
198// Tables
199// ——————————————————————————————————————————————————————————————————————————
200
201/// A table is a collection of rows and columns with optional alignment.
202/// The first row is the header row.
203#[derive(Debug, Clone, PartialEq)]
204#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
205pub struct Table {
206    /// Each row is a vector of *cells*; header row is **row 0**.
207    pub rows: Vec<TableRow>,
208
209    /// Column alignment; `alignments.len() == column_count`.
210    pub alignments: Vec<Alignment>,
211}
212
213/// A table row is a vector of cells (columns).
214pub type TableRow = Vec<TableCell>;
215
216/// A table cell is a vector of inlines (text, links, etc.).
217pub type TableCell = Vec<Inline>;
218
219/// Specifies the alignment of a table cell.
220#[derive(Debug, Clone, Copy, PartialEq, Eq)]
221#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
222pub enum Alignment {
223    /// No alignment specified
224    None,
225
226    /// Left aligned
227    Left,
228
229    /// Right aligned
230    Center,
231
232    /// Right aligned
233    Right,
234}
235
236// ——————————————————————————————————————————————————————————————————————————
237// Footnotes
238// ——————————————————————————————————————————————————————————————————————————
239
240#[derive(Debug, Clone, PartialEq)]
241#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
242pub struct FootnoteDefinition {
243    /// Normalized label (without leading `^`).
244    pub label: String,
245
246    /// Footnote content (blocks).
247    pub blocks: Vec<Block>,
248}
249
250// ——————————————————————————————————————————————————————————————————————————
251// Inline‑level nodes
252// ——————————————————————————————————————————————————————————————————————————
253
254#[derive(Debug, Clone, PartialEq)]
255#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
256pub enum Inline {
257    /// Plain text (decoded entity references, preserved backslash escapes).
258    Text(String),
259
260    /// Hard line break
261    LineBreak,
262
263    /// Inline code span
264    Code(String),
265
266    /// Raw HTML fragment
267    Html(String),
268
269    /// Link to a destination with optional title.
270    Link(Link),
271
272    /// Reference link
273    LinkReference(LinkReference),
274
275    /// Image with optional title.
276    Image(Link),
277
278    /// Emphasis (`*` / `_`)
279    Emphasis(Vec<Inline>),
280    /// Strong emphasis (`**` / `__`)
281    Strong(Vec<Inline>),
282    /// Strikethrough (`~~`)
283    Strikethrough(Vec<Inline>),
284
285    /// Autolink (`<https://>` or `<mailto:…>`)
286    Autolink(String),
287
288    /// Footnote reference (`[^label]`)
289    FootnoteReference(String),
290
291    /// Empty element. This is used to represent skipped elements in the AST.
292    Empty,
293}
294
295/// Re‑usable structure for links and images (destination + children).
296#[derive(Debug, Clone, PartialEq)]
297#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
298pub struct Link {
299    /// Destination URL (absolute or relative) or email address.
300    pub destination: String,
301
302    /// Optional title (for links and images).
303    pub title: Option<String>,
304
305    /// Inline content (text, code, etc.) inside the link or image.
306    pub children: Vec<Inline>,
307}
308
309#[derive(Debug, Clone, PartialEq)]
310#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
311pub struct LinkReference {
312    /// Normalized label (acts as the *identifier*).
313    pub label: String,
314
315    /// Link text
316    pub text: String,
317}