markdown_ppp/ast/mod.rs
1//! Fully‑typed Abstract Syntax Tree (AST) for CommonMark + GitHub Flavored Markdown (GFM)
2//! ------------------------------------------------------------------------------------
3//! This module models every construct described in the **CommonMark 1.0 specification**
4//! together with the widely‑used **GFM extensions**: tables, strikethrough, autolinks,
5//! task‑list items and footnotes.
6//!
7//! The design separates **block‑level** and **inline‑level** nodes because parsers and
8//! renderers typically operate on these tiers independently.
9//!
10//! ```text
11//! Document ─┐
12//! └─ Block ─┐
13//! ├─ Inline
14//! └─ ...
15//! ```
16
17// ——————————————————————————————————————————————————————————————————————————
18// Document root
19// ——————————————————————————————————————————————————————————————————————————
20
21/// Root of a Markdown document
22#[derive(Debug, Clone, PartialEq)]
23#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
24pub struct Document {
25 /// Top‑level block sequence **in document order**.
26 pub blocks: Vec<Block>,
27}
28
29// ——————————————————————————————————————————————————————————————————————————
30// Block‑level nodes
31// ——————————————————————————————————————————————————————————————————————————
32
33/// Block‑level constructs in the order they appear in the CommonMark spec.
34#[derive(Debug, Clone, PartialEq)]
35#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
36pub enum Block {
37 /// Ordinary paragraph
38 Paragraph(Vec<Inline>),
39
40 /// ATX (`# Heading`) or Setext (`===`) heading
41 Heading(Heading),
42
43 /// Thematic break (horizontal rule)
44 ThematicBreak,
45
46 /// Block quote
47 BlockQuote(Vec<Block>),
48
49 /// List (bullet or ordered)
50 List(List),
51
52 /// Fenced or indented code block
53 CodeBlock(CodeBlock),
54
55 /// Raw HTML block
56 HtmlBlock(String),
57
58 /// Link reference definition. Preserved for round‑tripping.
59 Definition(LinkDefinition),
60
61 /// Tables
62 Table(Table),
63
64 /// Footnote definition
65 FootnoteDefinition(FootnoteDefinition),
66
67 /// Empty block. This is used to represent skipped blocks in the AST.
68 Empty,
69}
70
71/// Heading with level 1–6 and inline content.
72#[derive(Debug, Clone, PartialEq)]
73#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
74pub struct Heading {
75 /// Heading level (1 ⇒ `<h1>`, …, 6 ⇒ `<h6>`).
76 pub level: u8,
77
78 /// Inlines that form the heading text (before trimming).
79 pub content: Vec<Inline>,
80}
81
82// ——————————————————————————————————————————————————————————————————————————
83// Lists
84// ——————————————————————————————————————————————————————————————————————————
85
86/// A list container — bullet or ordered.
87#[derive(Debug, Clone, PartialEq)]
88#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
89pub struct List {
90 /// Kind of list together with additional semantic data (start index or
91 /// bullet marker).
92 pub kind: ListKind,
93
94 /// List items in source order.
95 pub items: Vec<ListItem>,
96}
97
98/// Specifies *what kind* of list we have.
99#[derive(Debug, Clone, PartialEq)]
100#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
101pub enum ListKind {
102 /// Ordered list (`1.`, `42.` …) with an *optional* explicit start number.
103 Ordered(ListOrderedKindOptions),
104
105 /// Bullet list (`-`, `*`, or `+`) together with the concrete marker.
106 Bullet(ListBulletKind),
107}
108
109/// Specifies *what kind* of list we have.
110#[derive(Debug, Clone, PartialEq)]
111#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
112pub struct ListOrderedKindOptions {
113 /// Start index (1, 2, …) for ordered lists.
114 pub start: u64,
115}
116
117/// Concrete bullet character used for a bullet list.
118#[derive(Debug, Clone, Copy, PartialEq, Eq)]
119#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
120pub enum ListBulletKind {
121 /// `-` U+002D
122 Dash,
123
124 /// `*` U+002A
125 Star,
126
127 /// `+` U+002B
128 Plus,
129}
130
131/// Item within a list.
132#[derive(Debug, Clone, PartialEq)]
133#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
134pub struct ListItem {
135 /// Task‑list checkbox state (GFM task‑lists). `None` ⇒ not a task list.
136 pub task: Option<TaskState>,
137
138 /// Nested blocks inside the list item.
139 pub blocks: Vec<Block>,
140}
141
142/// State of a task‑list checkbox.
143#[derive(Debug, Clone, Copy, PartialEq, Eq)]
144#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
145pub enum TaskState {
146 /// Unchecked (GFM task‑list item)
147 Incomplete,
148
149 /// Checked (GFM task‑list item)
150 Complete,
151}
152
153// ——————————————————————————————————————————————————————————————————————————
154// Code blocks
155// ——————————————————————————————————————————————————————————————————————————
156
157/// Fenced or indented code block.
158#[derive(Debug, Clone, PartialEq)]
159#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
160pub struct CodeBlock {
161 /// Distinguishes indented vs fenced code and stores the *info string*.
162 pub kind: CodeBlockKind,
163
164 /// Literal text inside the code block **without** final newline trimming.
165 pub literal: String,
166}
167
168/// The concrete kind of a code block.
169#[derive(Debug, Clone, PartialEq)]
170#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
171pub enum CodeBlockKind {
172 /// Indented block (≥ 4 spaces or 1 tab per line).
173 Indented,
174
175 /// Fenced block with *optional* info string (language, etc.).
176 Fenced { info: Option<String> },
177}
178
179// ——————————————————————————————————————————————————————————————————————————
180// Link reference definitions
181// ——————————————————————————————————————————————————————————————————————————
182
183/// Link reference definition (GFM) with a label, destination and optional title.
184#[derive(Debug, Clone, PartialEq)]
185#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
186pub struct LinkDefinition {
187 /// Normalized label (acts as the *identifier*).
188 pub label: String,
189
190 /// Link URL (absolute or relative) or email address.
191 pub destination: String,
192
193 /// Optional title (for links and images).
194 pub title: Option<String>,
195}
196
197// ——————————————————————————————————————————————————————————————————————————
198// Tables
199// ——————————————————————————————————————————————————————————————————————————
200
201/// A table is a collection of rows and columns with optional alignment.
202/// The first row is the header row.
203#[derive(Debug, Clone, PartialEq)]
204#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
205pub struct Table {
206 /// Each row is a vector of *cells*; header row is **row 0**.
207 pub rows: Vec<TableRow>,
208
209 /// Column alignment; `alignments.len() == column_count`.
210 pub alignments: Vec<Alignment>,
211}
212
213/// A table row is a vector of cells (columns).
214pub type TableRow = Vec<TableCell>;
215
216/// A table cell is a vector of inlines (text, links, etc.).
217pub type TableCell = Vec<Inline>;
218
219/// Specifies the alignment of a table cell.
220#[derive(Debug, Clone, Copy, PartialEq, Eq)]
221#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
222pub enum Alignment {
223 /// No alignment specified
224 None,
225
226 /// Left aligned
227 Left,
228
229 /// Right aligned
230 Center,
231
232 /// Right aligned
233 Right,
234}
235
236// ——————————————————————————————————————————————————————————————————————————
237// Footnotes
238// ——————————————————————————————————————————————————————————————————————————
239
240#[derive(Debug, Clone, PartialEq)]
241#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
242pub struct FootnoteDefinition {
243 /// Normalized label (without leading `^`).
244 pub label: String,
245
246 /// Footnote content (blocks).
247 pub blocks: Vec<Block>,
248}
249
250// ——————————————————————————————————————————————————————————————————————————
251// Inline‑level nodes
252// ——————————————————————————————————————————————————————————————————————————
253
254#[derive(Debug, Clone, PartialEq)]
255#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
256pub enum Inline {
257 /// Plain text (decoded entity references, preserved backslash escapes).
258 Text(String),
259
260 /// Hard line break
261 LineBreak,
262
263 /// Inline code span
264 Code(String),
265
266 /// Raw HTML fragment
267 Html(String),
268
269 /// Link to a destination with optional title.
270 Link(Link),
271
272 /// Reference link
273 LinkReference(LinkReference),
274
275 /// Image with optional title.
276 Image(Link),
277
278 /// Emphasis (`*` / `_`)
279 Emphasis(Vec<Inline>),
280 /// Strong emphasis (`**` / `__`)
281 Strong(Vec<Inline>),
282 /// Strikethrough (`~~`)
283 Strikethrough(Vec<Inline>),
284
285 /// Autolink (`<https://>` or `<mailto:…>`)
286 Autolink(String),
287
288 /// Footnote reference (`[^label]`)
289 FootnoteReference(String),
290
291 /// Empty element. This is used to represent skipped elements in the AST.
292 Empty,
293}
294
295/// Re‑usable structure for links and images (destination + children).
296#[derive(Debug, Clone, PartialEq)]
297#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
298pub struct Link {
299 /// Destination URL (absolute or relative) or email address.
300 pub destination: String,
301
302 /// Optional title (for links and images).
303 pub title: Option<String>,
304
305 /// Inline content (text, code, etc.) inside the link or image.
306 pub children: Vec<Inline>,
307}
308
309#[derive(Debug, Clone, PartialEq)]
310#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
311pub struct LinkReference {
312 /// Normalized label (acts as the *identifier*).
313 pub label: String,
314
315 /// Link text
316 pub text: String,
317}