markdown_ppp/ast/mod.rs
1//! Fully‑typed Abstract Syntax Tree (AST) for CommonMark + GitHub Flavored Markdown (GFM)
2//! ------------------------------------------------------------------------------------
3//! This module models every construct described in the **CommonMark 1.0 specification**
4//! together with the widely‑used **GFM extensions**: tables, strikethrough, autolinks,
5//! task‑list items and footnotes.
6//!
7//! The design separates **block‑level** and **inline‑level** nodes because parsers and
8//! renderers typically operate on these tiers independently.
9//!
10//! ```text
11//! Document ─┐
12//! └─ Block ─┐
13//! ├─ Inline
14//! └─ ...
15//! ```
16
17mod github_alerts;
18pub use github_alerts::{GitHubAlert, GitHubAlertType};
19
20// ——————————————————————————————————————————————————————————————————————————
21// Document root
22// ——————————————————————————————————————————————————————————————————————————
23
24/// Root of a Markdown document
25#[derive(Debug, Clone, PartialEq)]
26#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
27pub struct Document {
28 /// Top‑level block sequence **in document order**.
29 pub blocks: Vec<Block>,
30}
31
32// ——————————————————————————————————————————————————————————————————————————
33// Block‑level nodes
34// ——————————————————————————————————————————————————————————————————————————
35
36/// Block‑level constructs in the order they appear in the CommonMark spec.
37#[derive(Debug, Clone, PartialEq)]
38#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
39pub enum Block {
40 /// Ordinary paragraph
41 Paragraph(Vec<Inline>),
42
43 /// ATX (`# Heading`) or Setext (`===`) heading
44 Heading(Heading),
45
46 /// Thematic break (horizontal rule)
47 ThematicBreak,
48
49 /// Block quote
50 BlockQuote(Vec<Block>),
51
52 /// List (bullet or ordered)
53 List(List),
54
55 /// Fenced or indented code block
56 CodeBlock(CodeBlock),
57
58 /// Raw HTML block
59 HtmlBlock(String),
60
61 /// Link reference definition. Preserved for round‑tripping.
62 Definition(LinkDefinition),
63
64 /// Tables
65 Table(Table),
66
67 /// Footnote definition
68 FootnoteDefinition(FootnoteDefinition),
69
70 /// GitHub alert block (NOTE, TIP, IMPORTANT, WARNING, CAUTION)
71 GitHubAlert(GitHubAlert),
72
73 /// Empty block. This is used to represent skipped blocks in the AST.
74 Empty,
75}
76
77/// Heading with level 1–6 and inline content.
78#[derive(Debug, Clone, PartialEq)]
79#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
80pub struct Heading {
81 /// Kind of heading (ATX or Setext) together with the level.
82 pub kind: HeadingKind,
83
84 /// Inlines that form the heading text (before trimming).
85 pub content: Vec<Inline>,
86}
87
88/// Heading with level 1–6 and inline content.
89#[derive(Debug, Clone, PartialEq)]
90#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
91pub enum HeadingKind {
92 /// ATX heading (`# Heading`)
93 Atx(u8),
94
95 /// Setext heading (`===` or `---`)
96 Setext(SetextHeading),
97}
98
99/// Setext heading with level and underline type.
100#[derive(Debug, Clone, PartialEq)]
101#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
102pub enum SetextHeading {
103 /// Setext heading with `=` underline
104 Level1,
105
106 /// Setext heading with `-` underline
107 Level2,
108}
109
110// ——————————————————————————————————————————————————————————————————————————
111// Lists
112// ——————————————————————————————————————————————————————————————————————————
113
114/// A list container — bullet or ordered.
115#[derive(Debug, Clone, PartialEq)]
116#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
117pub struct List {
118 /// Kind of list together with additional semantic data (start index or
119 /// bullet marker).
120 pub kind: ListKind,
121
122 /// List items in source order.
123 pub items: Vec<ListItem>,
124}
125
126/// Specifies *what kind* of list we have.
127#[derive(Debug, Clone, PartialEq)]
128#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
129pub enum ListKind {
130 /// Ordered list (`1.`, `42.` …) with an *optional* explicit start number.
131 Ordered(ListOrderedKindOptions),
132
133 /// Bullet list (`-`, `*`, or `+`) together with the concrete marker.
134 Bullet(ListBulletKind),
135}
136
137/// Specifies *what kind* of list we have.
138#[derive(Debug, Clone, PartialEq)]
139#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
140pub struct ListOrderedKindOptions {
141 /// Start index (1, 2, …) for ordered lists.
142 pub start: u64,
143}
144
145/// Concrete bullet character used for a bullet list.
146#[derive(Debug, Clone, Copy, PartialEq, Eq)]
147#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
148pub enum ListBulletKind {
149 /// `-` U+002D
150 Dash,
151
152 /// `*` U+002A
153 Star,
154
155 /// `+` U+002B
156 Plus,
157}
158
159/// Item within a list.
160#[derive(Debug, Clone, PartialEq)]
161#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
162pub struct ListItem {
163 /// Task‑list checkbox state (GFM task‑lists). `None` ⇒ not a task list.
164 pub task: Option<TaskState>,
165
166 /// Nested blocks inside the list item.
167 pub blocks: Vec<Block>,
168}
169
170/// State of a task‑list checkbox.
171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
172#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
173pub enum TaskState {
174 /// Unchecked (GFM task‑list item)
175 Incomplete,
176
177 /// Checked (GFM task‑list item)
178 Complete,
179}
180
181// ——————————————————————————————————————————————————————————————————————————
182// Code blocks
183// ——————————————————————————————————————————————————————————————————————————
184
185/// Fenced or indented code block.
186#[derive(Debug, Clone, PartialEq)]
187#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
188pub struct CodeBlock {
189 /// Distinguishes indented vs fenced code and stores the *info string*.
190 pub kind: CodeBlockKind,
191
192 /// Literal text inside the code block **without** final newline trimming.
193 pub literal: String,
194}
195
196/// The concrete kind of a code block.
197#[derive(Debug, Clone, PartialEq)]
198#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
199pub enum CodeBlockKind {
200 /// Indented block (≥ 4 spaces or 1 tab per line).
201 Indented,
202
203 /// Fenced block with *optional* info string (language, etc.).
204 Fenced { info: Option<String> },
205}
206
207// ——————————————————————————————————————————————————————————————————————————
208// Link reference definitions
209// ——————————————————————————————————————————————————————————————————————————
210
211/// Link reference definition (GFM) with a label, destination and optional title.
212#[derive(Debug, Clone, PartialEq)]
213#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
214pub struct LinkDefinition {
215 /// Link label (acts as the *identifier*).
216 pub label: Vec<Inline>,
217
218 /// Link URL (absolute or relative) or email address.
219 pub destination: String,
220
221 /// Optional title (for links and images).
222 pub title: Option<String>,
223}
224
225// ——————————————————————————————————————————————————————————————————————————
226// Tables
227// ——————————————————————————————————————————————————————————————————————————
228
229/// A table is a collection of rows and columns with optional alignment.
230/// The first row is the header row.
231#[derive(Debug, Clone, PartialEq)]
232#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
233pub struct Table {
234 /// Each row is a vector of *cells*; header row is **row 0**.
235 pub rows: Vec<TableRow>,
236
237 /// Column alignment; `alignments.len() == column_count`.
238 pub alignments: Vec<Alignment>,
239}
240
241/// A table row is a vector of cells (columns).
242pub type TableRow = Vec<TableCell>;
243
244/// A table cell is a vector of inlines (text, links, etc.).
245pub type TableCell = Vec<Inline>;
246
247/// Specifies the alignment of a table cell.
248#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
249#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
250pub enum Alignment {
251 /// No alignment specified
252 None,
253
254 /// Left aligned
255 #[default]
256 Left,
257
258 /// Right aligned
259 Center,
260
261 /// Right aligned
262 Right,
263}
264
265// ——————————————————————————————————————————————————————————————————————————
266// Footnotes
267// ——————————————————————————————————————————————————————————————————————————
268
269#[derive(Debug, Clone, PartialEq)]
270/// Footnote definition block (e.g., `[^label]: content`).
271#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
272pub struct FootnoteDefinition {
273 /// Normalized label (without leading `^`).
274 pub label: String,
275
276 /// Footnote content (blocks).
277 pub blocks: Vec<Block>,
278}
279
280// ——————————————————————————————————————————————————————————————————————————
281// Inline‑level nodes
282// ——————————————————————————————————————————————————————————————————————————
283
284/// Inline-level elements within paragraphs, headings, and other blocks.
285#[derive(Debug, Clone, PartialEq, Hash, Eq)]
286#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
287pub enum Inline {
288 /// Plain text (decoded entity references, preserved backslash escapes).
289 Text(String),
290
291 /// Hard line break
292 LineBreak,
293
294 /// Inline code span
295 Code(String),
296
297 /// Raw HTML fragment
298 Html(String),
299
300 /// Link to a destination with optional title.
301 Link(Link),
302
303 /// Reference link
304 LinkReference(LinkReference),
305
306 /// Image with optional title.
307 Image(Image),
308
309 /// Emphasis (`*` / `_`)
310 Emphasis(Vec<Inline>),
311 /// Strong emphasis (`**` / `__`)
312 Strong(Vec<Inline>),
313 /// Strikethrough (`~~`)
314 Strikethrough(Vec<Inline>),
315
316 /// Autolink (`<https://>` or `<mailto:…>`)
317 Autolink(String),
318
319 /// Footnote reference (`[^label]`)
320 FootnoteReference(String),
321
322 /// Empty element. This is used to represent skipped elements in the AST.
323 Empty,
324}
325
326/// Re‑usable structure for links and images (destination + children).
327#[derive(Debug, Clone, PartialEq, Hash, Eq)]
328#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
329pub struct Link {
330 /// Destination URL (absolute or relative) or email address.
331 pub destination: String,
332
333 /// Optional title (for links and images).
334 pub title: Option<String>,
335
336 /// Inline content (text, code, etc.) inside the link or image.
337 pub children: Vec<Inline>,
338}
339
340/// Re‑usable structure for links and images (destination + children).
341#[derive(Debug, Clone, PartialEq, Hash, Eq)]
342#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
343pub struct Image {
344 /// Image URL (absolute or relative).
345 pub destination: String,
346
347 /// Optional title.
348 pub title: Option<String>,
349
350 /// Alternative text.
351 pub alt: String,
352}
353
354/// Reference-style link (e.g., `[text][label]` or `[label][]`).
355#[derive(Debug, Clone, PartialEq, Hash, Eq)]
356#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
357pub struct LinkReference {
358 /// Link label (acts as the *identifier*).
359 pub label: Vec<Inline>,
360
361 /// Link text
362 pub text: Vec<Inline>,
363}