Skip to main content

markdown_ppp/ast/
mod.rs

1//! Fully‑typed Abstract Syntax Tree (AST) for CommonMark + GitHub Flavored Markdown (GFM)
2//! ------------------------------------------------------------------------------------
3//! This module models every construct described in the **CommonMark 1.0 specification**
4//! together with the widely‑used **GFM extensions**: tables, strikethrough, autolinks,
5//! task‑list items and footnotes.
6//!
7//! The design separates **block‑level** and **inline‑level** nodes because parsers and
8//! renderers typically operate on these tiers independently.
9//!
10//! ```text
11//! Document ─┐
12//!           └─ Block ─┐
13//!                     ├─ Inline
14//!                     └─ ...
15//! ```
16//!
17//! # User Data Support
18//!
19//! This crate supports attaching user-defined data to AST nodes through the generic
20//! AST module. See [`crate::ast::generic`] for more details.
21
22/// Conversion utilities for AST nodes with user data
23pub mod convert;
24
25/// Generic AST types that support user-defined data
26pub mod generic;
27
28/// Pre-processing indices for footnotes and link definitions.
29pub(crate) mod index;
30
31/// Visitor-based MapData implementation to avoid recursion limits
32pub mod map_data_visitor;
33
34mod github_alerts;
35pub use github_alerts::{GitHubAlert, GitHubAlertType};
36
37// ——————————————————————————————————————————————————————————————————————————
38// Document root
39// ——————————————————————————————————————————————————————————————————————————
40
41/// Root of a Markdown document
42#[derive(Debug, Clone, PartialEq)]
43#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
44pub struct Document {
45    /// Top‑level block sequence **in document order**.
46    pub blocks: Vec<Block>,
47}
48
49// ——————————————————————————————————————————————————————————————————————————
50// Block‑level nodes
51// ——————————————————————————————————————————————————————————————————————————
52
53/// Block‑level constructs in the order they appear in the CommonMark spec.
54#[derive(Debug, Clone, PartialEq)]
55#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
56pub enum Block {
57    /// Ordinary paragraph
58    Paragraph(Vec<Inline>),
59
60    /// ATX (`# Heading`) or Setext (`===`) heading
61    Heading(Heading),
62
63    /// Thematic break (horizontal rule)
64    ThematicBreak,
65
66    /// Block quote
67    BlockQuote(Vec<Block>),
68
69    /// List (bullet or ordered)
70    List(List),
71
72    /// Fenced or indented code block
73    CodeBlock(CodeBlock),
74
75    /// Raw HTML block
76    HtmlBlock(String),
77
78    /// Link reference definition.  Preserved for round‑tripping.
79    Definition(LinkDefinition),
80
81    /// Tables
82    Table(Table),
83
84    /// Footnote definition
85    FootnoteDefinition(FootnoteDefinition),
86
87    /// GitHub alert block (NOTE, TIP, IMPORTANT, WARNING, CAUTION)
88    GitHubAlert(GitHubAlert),
89
90    /// Empty block. This is used to represent skipped blocks in the AST.
91    Empty,
92}
93
94/// Heading with level 1–6 and inline content.
95#[derive(Debug, Clone, PartialEq)]
96#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
97pub struct Heading {
98    /// Kind of heading (ATX or Setext) together with the level.
99    pub kind: HeadingKind,
100
101    /// Inlines that form the heading text (before trimming).
102    pub content: Vec<Inline>,
103}
104
105/// Heading with level 1–6 and inline content.
106#[derive(Debug, Clone, PartialEq)]
107#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
108pub enum HeadingKind {
109    /// ATX heading (`# Heading`)
110    Atx(u8),
111
112    /// Setext heading (`===` or `---`)
113    Setext(SetextHeading),
114}
115
116/// Setext heading with level and underline type.
117#[derive(Debug, Clone, PartialEq)]
118#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
119pub enum SetextHeading {
120    /// Setext heading with `=` underline
121    Level1,
122
123    /// Setext heading with `-` underline
124    Level2,
125}
126
127// ——————————————————————————————————————————————————————————————————————————
128// Lists
129// ——————————————————————————————————————————————————————————————————————————
130
131/// A list container — bullet or ordered.
132#[derive(Debug, Clone, PartialEq)]
133#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
134pub struct List {
135    /// Kind of list together with additional semantic data (start index or
136    /// bullet marker).
137    pub kind: ListKind,
138
139    /// List items in source order.
140    pub items: Vec<ListItem>,
141}
142
143/// Specifies *what kind* of list we have.
144#[derive(Debug, Clone, PartialEq)]
145#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
146pub enum ListKind {
147    /// Ordered list (`1.`, `42.` …) with an *optional* explicit start number.
148    Ordered(ListOrderedKindOptions),
149
150    /// Bullet list (`-`, `*`, or `+`) together with the concrete marker.
151    Bullet(ListBulletKind),
152}
153
154/// Specifies *what kind* of list we have.
155#[derive(Debug, Clone, PartialEq)]
156#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
157pub struct ListOrderedKindOptions {
158    /// Start index (1, 2, …) for ordered lists.
159    pub start: u64,
160}
161
162/// Concrete bullet character used for a bullet list.
163#[derive(Debug, Clone, Copy, PartialEq, Eq)]
164#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
165pub enum ListBulletKind {
166    /// `-` U+002D
167    Dash,
168
169    /// `*` U+002A
170    Star,
171
172    /// `+` U+002B
173    Plus,
174}
175
176/// Item within a list.
177#[derive(Debug, Clone, PartialEq)]
178#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
179pub struct ListItem {
180    /// Task‑list checkbox state (GFM task‑lists). `None` ⇒ not a task list.
181    pub task: Option<TaskState>,
182
183    /// Nested blocks inside the list item.
184    pub blocks: Vec<Block>,
185}
186
187/// State of a task‑list checkbox.
188#[derive(Debug, Clone, Copy, PartialEq, Eq)]
189#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
190pub enum TaskState {
191    /// Unchecked (GFM task‑list item)
192    Incomplete,
193
194    /// Checked (GFM task‑list item)
195    Complete,
196}
197
198// ——————————————————————————————————————————————————————————————————————————
199// Code blocks
200// ——————————————————————————————————————————————————————————————————————————
201
202/// Fenced or indented code block.
203#[derive(Debug, Clone, PartialEq)]
204#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
205pub struct CodeBlock {
206    /// Distinguishes indented vs fenced code and stores the *info string*.
207    pub kind: CodeBlockKind,
208
209    /// Literal text inside the code block **without** final newline trimming.
210    pub literal: String,
211}
212
213/// The concrete kind of a code block.
214#[derive(Debug, Clone, PartialEq)]
215#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
216pub enum CodeBlockKind {
217    /// Indented block (≥ 4 spaces or 1 tab per line).
218    Indented,
219
220    /// Fenced block with *optional* info string (language, etc.).
221    Fenced {
222        /// Optional info string containing language identifier and other metadata
223        info: Option<String>,
224    },
225}
226
227// ——————————————————————————————————————————————————————————————————————————
228// Link reference definitions
229// ——————————————————————————————————————————————————————————————————————————
230
231/// Link reference definition (GFM) with a label, destination and optional title.
232#[derive(Debug, Clone, PartialEq)]
233#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
234pub struct LinkDefinition {
235    /// Link label (acts as the *identifier*).
236    pub label: Vec<Inline>,
237
238    /// Link URL (absolute or relative) or email address.
239    pub destination: String,
240
241    /// Optional title (for links and images).
242    pub title: Option<String>,
243}
244
245// ——————————————————————————————————————————————————————————————————————————
246// Tables
247// ——————————————————————————————————————————————————————————————————————————
248
249/// A table is a collection of rows and columns with optional alignment.
250/// The first row is the header row.
251#[derive(Debug, Clone, PartialEq)]
252#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
253pub struct Table {
254    /// Each row is a vector of *cells*; header row is **row 0**.
255    pub rows: Vec<TableRow>,
256
257    /// Column alignment; `alignments.len() == column_count`.
258    pub alignments: Vec<Alignment>,
259}
260
261/// A table row is a vector of cells (columns).
262pub type TableRow = Vec<TableCell>;
263
264/// A table cell is a vector of inlines (text, links, etc.).
265pub type TableCell = Vec<Inline>;
266
267/// Specifies the alignment of a table cell.
268#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
269#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
270pub enum Alignment {
271    /// No alignment specified
272    None,
273
274    /// Left aligned
275    #[default]
276    Left,
277
278    /// Right aligned
279    Center,
280
281    /// Right aligned
282    Right,
283}
284
285// ——————————————————————————————————————————————————————————————————————————
286// Footnotes
287// ——————————————————————————————————————————————————————————————————————————
288
289#[derive(Debug, Clone, PartialEq)]
290/// Footnote definition block (e.g., `[^label]: content`).
291#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
292pub struct FootnoteDefinition {
293    /// Normalized label (without leading `^`).
294    pub label: String,
295
296    /// Footnote content (blocks).
297    pub blocks: Vec<Block>,
298}
299
300// ——————————————————————————————————————————————————————————————————————————
301// Inline‑level nodes
302// ——————————————————————————————————————————————————————————————————————————
303
304/// Inline-level elements within paragraphs, headings, and other blocks.
305#[derive(Debug, Clone, PartialEq, Hash, Eq)]
306#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
307pub enum Inline {
308    /// Plain text (decoded entity references, preserved backslash escapes).
309    Text(String),
310
311    /// Hard line break
312    LineBreak,
313
314    /// Inline code span
315    Code(String),
316
317    /// Raw HTML fragment
318    Html(String),
319
320    /// Link to a destination with optional title.
321    Link(Link),
322
323    /// Reference link
324    LinkReference(LinkReference),
325
326    /// Image with optional title.
327    Image(Image),
328
329    /// Emphasis (`*` / `_`)
330    Emphasis(Vec<Inline>),
331    /// Strong emphasis (`**` / `__`)
332    Strong(Vec<Inline>),
333    /// Strikethrough (`~~`)
334    Strikethrough(Vec<Inline>),
335
336    /// Autolink (`<https://>` or `<mailto:…>`)
337    Autolink(String),
338
339    /// Footnote reference (`[^label]`)
340    FootnoteReference(String),
341
342    /// Empty element. This is used to represent skipped elements in the AST.
343    Empty,
344}
345
346/// Re‑usable structure for links and images (destination + children).
347#[derive(Debug, Clone, PartialEq, Hash, Eq)]
348#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
349pub struct Link {
350    /// Destination URL (absolute or relative) or email address.
351    pub destination: String,
352
353    /// Optional title (for links and images).
354    pub title: Option<String>,
355
356    /// Inline content (text, code, etc.) inside the link or image.
357    pub children: Vec<Inline>,
358}
359
360/// Re‑usable structure for links and images (destination + children).
361#[derive(Debug, Clone, PartialEq, Hash, Eq)]
362#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
363pub struct Image {
364    /// Image URL (absolute or relative).
365    pub destination: String,
366
367    /// Optional title.
368    pub title: Option<String>,
369
370    /// Alternative text.
371    pub alt: String,
372}
373
374/// Reference-style link (e.g., `[text][label]` or `[label][]`).
375#[derive(Debug, Clone, PartialEq, Hash, Eq)]
376#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
377pub struct LinkReference {
378    /// Link label (acts as the *identifier*).
379    pub label: Vec<Inline>,
380
381    /// Link text
382    pub text: Vec<Inline>,
383}
384
385// ——————————————————————————————————————————————————————————————————————————
386// Backward compatibility type aliases
387// ——————————————————————————————————————————————————————————————————————————
388
389/// Simple document without user data (backward compatible)
390pub type SimpleDocument = generic::Document<()>;
391
392/// Simple block without user data (backward compatible)
393pub type SimpleBlock = generic::Block<()>;
394
395/// Simple inline without user data (backward compatible)
396pub type SimpleInline = generic::Inline<()>;
397
398/// Simple heading without user data (backward compatible)
399pub type SimpleHeading = generic::Heading<()>;
400
401/// Simple list without user data (backward compatible)
402pub type SimpleList = generic::List<()>;
403
404/// Simple list item without user data (backward compatible)
405pub type SimpleListItem = generic::ListItem<()>;
406
407/// Simple code block without user data (backward compatible)
408pub type SimpleCodeBlock = generic::CodeBlock<()>;
409
410/// Simple link definition without user data (backward compatible)
411pub type SimpleLinkDefinition = generic::LinkDefinition<()>;
412
413/// Simple table without user data (backward compatible)
414pub type SimpleTable = generic::Table<()>;
415
416/// Simple table row without user data (backward compatible)
417pub type SimpleTableRow = generic::TableRow<()>;
418
419/// Simple table cell without user data (backward compatible)
420pub type SimpleTableCell = generic::TableCell<()>;
421
422/// Simple footnote definition without user data (backward compatible)
423pub type SimpleFootnoteDefinition = generic::FootnoteDefinition<()>;
424
425/// Simple GitHub alert without user data (backward compatible)
426pub type SimpleGitHubAlert = generic::GitHubAlertNode<()>;
427
428/// Simple link without user data (backward compatible)
429pub type SimpleLink = generic::Link<()>;
430
431/// Simple image without user data (backward compatible)
432pub type SimpleImage = generic::Image<()>;
433
434/// Simple link reference without user data (backward compatible)
435pub type SimpleLinkReference = generic::LinkReference<()>;
436
437// ——————————————————————————————————————————————————————————————————————————
438// Tests
439// ——————————————————————————————————————————————————————————————————————————