markdown_ppp/ast/mod.rs
1//! Fully‑typed Abstract Syntax Tree (AST) for CommonMark + GitHub Flavored Markdown (GFM)
2//! ------------------------------------------------------------------------------------
3//! This module models every construct described in the **CommonMark 1.0 specification**
4//! together with the widely‑used **GFM extensions**: tables, strikethrough, autolinks,
5//! task‑list items and footnotes.
6//!
7//! The design separates **block‑level** and **inline‑level** nodes because parsers and
8//! renderers typically operate on these tiers independently.
9//!
10//! ```text
11//! Document ─┐
12//! └─ Block ─┐
13//! ├─ Inline
14//! └─ ...
15//! ```
16
17// ——————————————————————————————————————————————————————————————————————————
18// Document root
19// ——————————————————————————————————————————————————————————————————————————
20
21/// Root of a Markdown document
22#[derive(Debug, Clone, PartialEq)]
23#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
24pub struct Document {
25 /// Top‑level block sequence **in document order**.
26 pub blocks: Vec<Block>,
27}
28
29// ——————————————————————————————————————————————————————————————————————————
30// Block‑level nodes
31// ——————————————————————————————————————————————————————————————————————————
32
33/// Block‑level constructs in the order they appear in the CommonMark spec.
34#[derive(Debug, Clone, PartialEq)]
35#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
36pub enum Block {
37 /// Ordinary paragraph
38 Paragraph(Vec<Inline>),
39
40 /// ATX (`# Heading`) or Setext (`===`) heading
41 Heading(Heading),
42
43 /// Thematic break (horizontal rule)
44 ThematicBreak,
45
46 /// Block quote
47 BlockQuote(Vec<Block>),
48
49 /// List (bullet or ordered)
50 List(List),
51
52 /// Fenced or indented code block
53 CodeBlock(CodeBlock),
54
55 /// Raw HTML block
56 HtmlBlock(String),
57
58 /// Link reference definition. Preserved for round‑tripping.
59 Definition(LinkDefinition),
60
61 /// Tables
62 Table(Table),
63
64 /// Footnote definition
65 FootnoteDefinition(FootnoteDefinition),
66
67 /// Empty block. This is used to represent skipped blocks in the AST.
68 Empty,
69}
70
71/// Heading with level 1–6 and inline content.
72#[derive(Debug, Clone, PartialEq)]
73#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
74pub struct Heading {
75 /// Kind of heading (ATX or Setext) together with the level.
76 pub kind: HeadingKind,
77
78 /// Inlines that form the heading text (before trimming).
79 pub content: Vec<Inline>,
80}
81
82/// Heading with level 1–6 and inline content.
83#[derive(Debug, Clone, PartialEq)]
84#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
85pub enum HeadingKind {
86 /// ATX heading (`# Heading`)
87 Atx(u8),
88
89 /// Setext heading (`===` or `---`)
90 Setext(SetextHeading),
91}
92
93/// Setext heading with level and underline type.
94#[derive(Debug, Clone, PartialEq)]
95#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
96pub enum SetextHeading {
97 /// Setext heading with `=` underline
98 Level1,
99
100 /// Setext heading with `-` underline
101 Level2,
102}
103
104// ——————————————————————————————————————————————————————————————————————————
105// Lists
106// ——————————————————————————————————————————————————————————————————————————
107
108/// A list container — bullet or ordered.
109#[derive(Debug, Clone, PartialEq)]
110#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
111pub struct List {
112 /// Kind of list together with additional semantic data (start index or
113 /// bullet marker).
114 pub kind: ListKind,
115
116 /// List items in source order.
117 pub items: Vec<ListItem>,
118}
119
120/// Specifies *what kind* of list we have.
121#[derive(Debug, Clone, PartialEq)]
122#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
123pub enum ListKind {
124 /// Ordered list (`1.`, `42.` …) with an *optional* explicit start number.
125 Ordered(ListOrderedKindOptions),
126
127 /// Bullet list (`-`, `*`, or `+`) together with the concrete marker.
128 Bullet(ListBulletKind),
129}
130
131/// Specifies *what kind* of list we have.
132#[derive(Debug, Clone, PartialEq)]
133#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
134pub struct ListOrderedKindOptions {
135 /// Start index (1, 2, …) for ordered lists.
136 pub start: u64,
137}
138
139/// Concrete bullet character used for a bullet list.
140#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
142pub enum ListBulletKind {
143 /// `-` U+002D
144 Dash,
145
146 /// `*` U+002A
147 Star,
148
149 /// `+` U+002B
150 Plus,
151}
152
153/// Item within a list.
154#[derive(Debug, Clone, PartialEq)]
155#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
156pub struct ListItem {
157 /// Task‑list checkbox state (GFM task‑lists). `None` ⇒ not a task list.
158 pub task: Option<TaskState>,
159
160 /// Nested blocks inside the list item.
161 pub blocks: Vec<Block>,
162}
163
164/// State of a task‑list checkbox.
165#[derive(Debug, Clone, Copy, PartialEq, Eq)]
166#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
167pub enum TaskState {
168 /// Unchecked (GFM task‑list item)
169 Incomplete,
170
171 /// Checked (GFM task‑list item)
172 Complete,
173}
174
175// ——————————————————————————————————————————————————————————————————————————
176// Code blocks
177// ——————————————————————————————————————————————————————————————————————————
178
179/// Fenced or indented code block.
180#[derive(Debug, Clone, PartialEq)]
181#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
182pub struct CodeBlock {
183 /// Distinguishes indented vs fenced code and stores the *info string*.
184 pub kind: CodeBlockKind,
185
186 /// Literal text inside the code block **without** final newline trimming.
187 pub literal: String,
188}
189
190/// The concrete kind of a code block.
191#[derive(Debug, Clone, PartialEq)]
192#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
193pub enum CodeBlockKind {
194 /// Indented block (≥ 4 spaces or 1 tab per line).
195 Indented,
196
197 /// Fenced block with *optional* info string (language, etc.).
198 Fenced { info: Option<String> },
199}
200
201// ——————————————————————————————————————————————————————————————————————————
202// Link reference definitions
203// ——————————————————————————————————————————————————————————————————————————
204
205/// Link reference definition (GFM) with a label, destination and optional title.
206#[derive(Debug, Clone, PartialEq)]
207#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
208pub struct LinkDefinition {
209 /// Link label (acts as the *identifier*).
210 pub label: Vec<Inline>,
211
212 /// Link URL (absolute or relative) or email address.
213 pub destination: String,
214
215 /// Optional title (for links and images).
216 pub title: Option<String>,
217}
218
219// ——————————————————————————————————————————————————————————————————————————
220// Tables
221// ——————————————————————————————————————————————————————————————————————————
222
223/// A table is a collection of rows and columns with optional alignment.
224/// The first row is the header row.
225#[derive(Debug, Clone, PartialEq)]
226#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
227pub struct Table {
228 /// Each row is a vector of *cells*; header row is **row 0**.
229 pub rows: Vec<TableRow>,
230
231 /// Column alignment; `alignments.len() == column_count`.
232 pub alignments: Vec<Alignment>,
233}
234
235/// A table row is a vector of cells (columns).
236pub type TableRow = Vec<TableCell>;
237
238/// A table cell is a vector of inlines (text, links, etc.).
239pub type TableCell = Vec<Inline>;
240
241/// Specifies the alignment of a table cell.
242#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
243#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
244pub enum Alignment {
245 /// No alignment specified
246 None,
247
248 /// Left aligned
249 #[default]
250 Left,
251
252 /// Right aligned
253 Center,
254
255 /// Right aligned
256 Right,
257}
258
259// ——————————————————————————————————————————————————————————————————————————
260// Footnotes
261// ——————————————————————————————————————————————————————————————————————————
262
263#[derive(Debug, Clone, PartialEq)]
264#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
265pub struct FootnoteDefinition {
266 /// Normalized label (without leading `^`).
267 pub label: String,
268
269 /// Footnote content (blocks).
270 pub blocks: Vec<Block>,
271}
272
273// ——————————————————————————————————————————————————————————————————————————
274// Inline‑level nodes
275// ——————————————————————————————————————————————————————————————————————————
276
277#[derive(Debug, Clone, PartialEq, Hash, Eq)]
278#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
279pub enum Inline {
280 /// Plain text (decoded entity references, preserved backslash escapes).
281 Text(String),
282
283 /// Hard line break
284 LineBreak,
285
286 /// Inline code span
287 Code(String),
288
289 /// Raw HTML fragment
290 Html(String),
291
292 /// Link to a destination with optional title.
293 Link(Link),
294
295 /// Reference link
296 LinkReference(LinkReference),
297
298 /// Image with optional title.
299 Image(Image),
300
301 /// Emphasis (`*` / `_`)
302 Emphasis(Vec<Inline>),
303 /// Strong emphasis (`**` / `__`)
304 Strong(Vec<Inline>),
305 /// Strikethrough (`~~`)
306 Strikethrough(Vec<Inline>),
307
308 /// Autolink (`<https://>` or `<mailto:…>`)
309 Autolink(String),
310
311 /// Footnote reference (`[^label]`)
312 FootnoteReference(String),
313
314 /// Empty element. This is used to represent skipped elements in the AST.
315 Empty,
316}
317
318/// Re‑usable structure for links and images (destination + children).
319#[derive(Debug, Clone, PartialEq, Hash, Eq)]
320#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
321pub struct Link {
322 /// Destination URL (absolute or relative) or email address.
323 pub destination: String,
324
325 /// Optional title (for links and images).
326 pub title: Option<String>,
327
328 /// Inline content (text, code, etc.) inside the link or image.
329 pub children: Vec<Inline>,
330}
331
332/// Re‑usable structure for links and images (destination + children).
333#[derive(Debug, Clone, PartialEq, Hash, Eq)]
334#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
335pub struct Image {
336 /// Image URL (absolute or relative).
337 pub destination: String,
338
339 /// Optional title.
340 pub title: Option<String>,
341
342 /// Alternative text.
343 pub alt: String,
344}
345
346#[derive(Debug, Clone, PartialEq, Hash, Eq)]
347#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
348pub struct LinkReference {
349 /// Link label (acts as the *identifier*).
350 pub label: Vec<Inline>,
351
352 /// Link text
353 pub text: Vec<Inline>,
354}