markdown_ppp/ast/mod.rs
1//! Fully‑typed Abstract Syntax Tree (AST) for CommonMark + GitHub Flavored Markdown (GFM)
2//! ------------------------------------------------------------------------------------
3//! This module models every construct described in the **CommonMark 1.0 specification**
4//! together with the widely‑used **GFM extensions**: tables, strikethrough, autolinks,
5//! task‑list items and footnotes.
6//!
7//! The design separates **block‑level** and **inline‑level** nodes because parsers and
8//! renderers typically operate on these tiers independently.
9//!
10//! ```text
11//! Document ─┐
12//! └─ Block ─┐
13//! ├─ Inline
14//! └─ ...
15//! ```
16//!
17//! # User Data Support
18//!
19//! This crate supports attaching user-defined data to AST nodes through the generic
20//! AST module. See [`crate::ast::generic`] for more details.
21
22/// Conversion utilities for AST nodes with user data
23pub mod convert;
24
25/// Generic AST types that support user-defined data
26pub mod generic;
27
28/// Visitor-based MapData implementation to avoid recursion limits
29pub mod map_data_visitor;
30
31mod github_alerts;
32pub use github_alerts::{GitHubAlert, GitHubAlertType};
33
34// ——————————————————————————————————————————————————————————————————————————
35// Document root
36// ——————————————————————————————————————————————————————————————————————————
37
38/// Root of a Markdown document
39#[derive(Debug, Clone, PartialEq)]
40#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
41pub struct Document {
42 /// Top‑level block sequence **in document order**.
43 pub blocks: Vec<Block>,
44}
45
46// ——————————————————————————————————————————————————————————————————————————
47// Block‑level nodes
48// ——————————————————————————————————————————————————————————————————————————
49
50/// Block‑level constructs in the order they appear in the CommonMark spec.
51#[derive(Debug, Clone, PartialEq)]
52#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
53pub enum Block {
54 /// Ordinary paragraph
55 Paragraph(Vec<Inline>),
56
57 /// ATX (`# Heading`) or Setext (`===`) heading
58 Heading(Heading),
59
60 /// Thematic break (horizontal rule)
61 ThematicBreak,
62
63 /// Block quote
64 BlockQuote(Vec<Block>),
65
66 /// List (bullet or ordered)
67 List(List),
68
69 /// Fenced or indented code block
70 CodeBlock(CodeBlock),
71
72 /// Raw HTML block
73 HtmlBlock(String),
74
75 /// Link reference definition. Preserved for round‑tripping.
76 Definition(LinkDefinition),
77
78 /// Tables
79 Table(Table),
80
81 /// Footnote definition
82 FootnoteDefinition(FootnoteDefinition),
83
84 /// GitHub alert block (NOTE, TIP, IMPORTANT, WARNING, CAUTION)
85 GitHubAlert(GitHubAlert),
86
87 /// Empty block. This is used to represent skipped blocks in the AST.
88 Empty,
89}
90
91/// Heading with level 1–6 and inline content.
92#[derive(Debug, Clone, PartialEq)]
93#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
94pub struct Heading {
95 /// Kind of heading (ATX or Setext) together with the level.
96 pub kind: HeadingKind,
97
98 /// Inlines that form the heading text (before trimming).
99 pub content: Vec<Inline>,
100}
101
102/// Heading with level 1–6 and inline content.
103#[derive(Debug, Clone, PartialEq)]
104#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
105pub enum HeadingKind {
106 /// ATX heading (`# Heading`)
107 Atx(u8),
108
109 /// Setext heading (`===` or `---`)
110 Setext(SetextHeading),
111}
112
113/// Setext heading with level and underline type.
114#[derive(Debug, Clone, PartialEq)]
115#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
116pub enum SetextHeading {
117 /// Setext heading with `=` underline
118 Level1,
119
120 /// Setext heading with `-` underline
121 Level2,
122}
123
124// ——————————————————————————————————————————————————————————————————————————
125// Lists
126// ——————————————————————————————————————————————————————————————————————————
127
128/// A list container — bullet or ordered.
129#[derive(Debug, Clone, PartialEq)]
130#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
131pub struct List {
132 /// Kind of list together with additional semantic data (start index or
133 /// bullet marker).
134 pub kind: ListKind,
135
136 /// List items in source order.
137 pub items: Vec<ListItem>,
138}
139
140/// Specifies *what kind* of list we have.
141#[derive(Debug, Clone, PartialEq)]
142#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
143pub enum ListKind {
144 /// Ordered list (`1.`, `42.` …) with an *optional* explicit start number.
145 Ordered(ListOrderedKindOptions),
146
147 /// Bullet list (`-`, `*`, or `+`) together with the concrete marker.
148 Bullet(ListBulletKind),
149}
150
151/// Specifies *what kind* of list we have.
152#[derive(Debug, Clone, PartialEq)]
153#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
154pub struct ListOrderedKindOptions {
155 /// Start index (1, 2, …) for ordered lists.
156 pub start: u64,
157}
158
159/// Concrete bullet character used for a bullet list.
160#[derive(Debug, Clone, Copy, PartialEq, Eq)]
161#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
162pub enum ListBulletKind {
163 /// `-` U+002D
164 Dash,
165
166 /// `*` U+002A
167 Star,
168
169 /// `+` U+002B
170 Plus,
171}
172
173/// Item within a list.
174#[derive(Debug, Clone, PartialEq)]
175#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
176pub struct ListItem {
177 /// Task‑list checkbox state (GFM task‑lists). `None` ⇒ not a task list.
178 pub task: Option<TaskState>,
179
180 /// Nested blocks inside the list item.
181 pub blocks: Vec<Block>,
182}
183
184/// State of a task‑list checkbox.
185#[derive(Debug, Clone, Copy, PartialEq, Eq)]
186#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
187pub enum TaskState {
188 /// Unchecked (GFM task‑list item)
189 Incomplete,
190
191 /// Checked (GFM task‑list item)
192 Complete,
193}
194
195// ——————————————————————————————————————————————————————————————————————————
196// Code blocks
197// ——————————————————————————————————————————————————————————————————————————
198
199/// Fenced or indented code block.
200#[derive(Debug, Clone, PartialEq)]
201#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
202pub struct CodeBlock {
203 /// Distinguishes indented vs fenced code and stores the *info string*.
204 pub kind: CodeBlockKind,
205
206 /// Literal text inside the code block **without** final newline trimming.
207 pub literal: String,
208}
209
210/// The concrete kind of a code block.
211#[derive(Debug, Clone, PartialEq)]
212#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
213pub enum CodeBlockKind {
214 /// Indented block (≥ 4 spaces or 1 tab per line).
215 Indented,
216
217 /// Fenced block with *optional* info string (language, etc.).
218 Fenced {
219 /// Optional info string containing language identifier and other metadata
220 info: Option<String>,
221 },
222}
223
224// ——————————————————————————————————————————————————————————————————————————
225// Link reference definitions
226// ——————————————————————————————————————————————————————————————————————————
227
228/// Link reference definition (GFM) with a label, destination and optional title.
229#[derive(Debug, Clone, PartialEq)]
230#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
231pub struct LinkDefinition {
232 /// Link label (acts as the *identifier*).
233 pub label: Vec<Inline>,
234
235 /// Link URL (absolute or relative) or email address.
236 pub destination: String,
237
238 /// Optional title (for links and images).
239 pub title: Option<String>,
240}
241
242// ——————————————————————————————————————————————————————————————————————————
243// Tables
244// ——————————————————————————————————————————————————————————————————————————
245
246/// A table is a collection of rows and columns with optional alignment.
247/// The first row is the header row.
248#[derive(Debug, Clone, PartialEq)]
249#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
250pub struct Table {
251 /// Each row is a vector of *cells*; header row is **row 0**.
252 pub rows: Vec<TableRow>,
253
254 /// Column alignment; `alignments.len() == column_count`.
255 pub alignments: Vec<Alignment>,
256}
257
258/// A table row is a vector of cells (columns).
259pub type TableRow = Vec<TableCell>;
260
261/// A table cell is a vector of inlines (text, links, etc.).
262pub type TableCell = Vec<Inline>;
263
264/// Specifies the alignment of a table cell.
265#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
266#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
267pub enum Alignment {
268 /// No alignment specified
269 None,
270
271 /// Left aligned
272 #[default]
273 Left,
274
275 /// Right aligned
276 Center,
277
278 /// Right aligned
279 Right,
280}
281
282// ——————————————————————————————————————————————————————————————————————————
283// Footnotes
284// ——————————————————————————————————————————————————————————————————————————
285
286#[derive(Debug, Clone, PartialEq)]
287/// Footnote definition block (e.g., `[^label]: content`).
288#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
289pub struct FootnoteDefinition {
290 /// Normalized label (without leading `^`).
291 pub label: String,
292
293 /// Footnote content (blocks).
294 pub blocks: Vec<Block>,
295}
296
297// ——————————————————————————————————————————————————————————————————————————
298// Inline‑level nodes
299// ——————————————————————————————————————————————————————————————————————————
300
301/// Inline-level elements within paragraphs, headings, and other blocks.
302#[derive(Debug, Clone, PartialEq, Hash, Eq)]
303#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
304pub enum Inline {
305 /// Plain text (decoded entity references, preserved backslash escapes).
306 Text(String),
307
308 /// Hard line break
309 LineBreak,
310
311 /// Inline code span
312 Code(String),
313
314 /// Raw HTML fragment
315 Html(String),
316
317 /// Link to a destination with optional title.
318 Link(Link),
319
320 /// Reference link
321 LinkReference(LinkReference),
322
323 /// Image with optional title.
324 Image(Image),
325
326 /// Emphasis (`*` / `_`)
327 Emphasis(Vec<Inline>),
328 /// Strong emphasis (`**` / `__`)
329 Strong(Vec<Inline>),
330 /// Strikethrough (`~~`)
331 Strikethrough(Vec<Inline>),
332
333 /// Autolink (`<https://>` or `<mailto:…>`)
334 Autolink(String),
335
336 /// Footnote reference (`[^label]`)
337 FootnoteReference(String),
338
339 /// Empty element. This is used to represent skipped elements in the AST.
340 Empty,
341}
342
343/// Re‑usable structure for links and images (destination + children).
344#[derive(Debug, Clone, PartialEq, Hash, Eq)]
345#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
346pub struct Link {
347 /// Destination URL (absolute or relative) or email address.
348 pub destination: String,
349
350 /// Optional title (for links and images).
351 pub title: Option<String>,
352
353 /// Inline content (text, code, etc.) inside the link or image.
354 pub children: Vec<Inline>,
355}
356
357/// Re‑usable structure for links and images (destination + children).
358#[derive(Debug, Clone, PartialEq, Hash, Eq)]
359#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
360pub struct Image {
361 /// Image URL (absolute or relative).
362 pub destination: String,
363
364 /// Optional title.
365 pub title: Option<String>,
366
367 /// Alternative text.
368 pub alt: String,
369}
370
371/// Reference-style link (e.g., `[text][label]` or `[label][]`).
372#[derive(Debug, Clone, PartialEq, Hash, Eq)]
373#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
374pub struct LinkReference {
375 /// Link label (acts as the *identifier*).
376 pub label: Vec<Inline>,
377
378 /// Link text
379 pub text: Vec<Inline>,
380}
381
382// ——————————————————————————————————————————————————————————————————————————
383// Backward compatibility type aliases
384// ——————————————————————————————————————————————————————————————————————————
385
386/// Simple document without user data (backward compatible)
387pub type SimpleDocument = generic::Document<()>;
388
389/// Simple block without user data (backward compatible)
390pub type SimpleBlock = generic::Block<()>;
391
392/// Simple inline without user data (backward compatible)
393pub type SimpleInline = generic::Inline<()>;
394
395/// Simple heading without user data (backward compatible)
396pub type SimpleHeading = generic::Heading<()>;
397
398/// Simple list without user data (backward compatible)
399pub type SimpleList = generic::List<()>;
400
401/// Simple list item without user data (backward compatible)
402pub type SimpleListItem = generic::ListItem<()>;
403
404/// Simple code block without user data (backward compatible)
405pub type SimpleCodeBlock = generic::CodeBlock<()>;
406
407/// Simple link definition without user data (backward compatible)
408pub type SimpleLinkDefinition = generic::LinkDefinition<()>;
409
410/// Simple table without user data (backward compatible)
411pub type SimpleTable = generic::Table<()>;
412
413/// Simple table row without user data (backward compatible)
414pub type SimpleTableRow = generic::TableRow<()>;
415
416/// Simple table cell without user data (backward compatible)
417pub type SimpleTableCell = generic::TableCell<()>;
418
419/// Simple footnote definition without user data (backward compatible)
420pub type SimpleFootnoteDefinition = generic::FootnoteDefinition<()>;
421
422/// Simple GitHub alert without user data (backward compatible)
423pub type SimpleGitHubAlert = generic::GitHubAlertNode<()>;
424
425/// Simple link without user data (backward compatible)
426pub type SimpleLink = generic::Link<()>;
427
428/// Simple image without user data (backward compatible)
429pub type SimpleImage = generic::Image<()>;
430
431/// Simple link reference without user data (backward compatible)
432pub type SimpleLinkReference = generic::LinkReference<()>;
433
434// ——————————————————————————————————————————————————————————————————————————
435// Tests
436// ——————————————————————————————————————————————————————————————————————————