markdown_ppp/ast/
mod.rs

1//! Fully‑typed Abstract Syntax Tree (AST) for CommonMark + GitHub Flavored Markdown (GFM)
2//! ------------------------------------------------------------------------------------
3//! This module models every construct described in the **CommonMark 1.0 specification**
4//! together with the widely‑used **GFM extensions**: tables, strikethrough, autolinks,
5//! task‑list items and footnotes.
6//!
7//! The design separates **block‑level** and **inline‑level** nodes because parsers and
8//! renderers typically operate on these tiers independently.
9//!
10//! ```text
11//! Document ─┐
12//!           └─ Block ─┐
13//!                     ├─ Inline
14//!                     └─ ...
15//! ```
16
17mod github_alerts;
18pub use github_alerts::{GitHubAlert, GitHubAlertType};
19
20// ——————————————————————————————————————————————————————————————————————————
21// Document root
22// ——————————————————————————————————————————————————————————————————————————
23
24/// Root of a Markdown document
25#[derive(Debug, Clone, PartialEq)]
26#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
27pub struct Document {
28    /// Top‑level block sequence **in document order**.
29    pub blocks: Vec<Block>,
30}
31
32// ——————————————————————————————————————————————————————————————————————————
33// Block‑level nodes
34// ——————————————————————————————————————————————————————————————————————————
35
36/// Block‑level constructs in the order they appear in the CommonMark spec.
37#[derive(Debug, Clone, PartialEq)]
38#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
39pub enum Block {
40    /// Ordinary paragraph
41    Paragraph(Vec<Inline>),
42
43    /// ATX (`# Heading`) or Setext (`===`) heading
44    Heading(Heading),
45
46    /// Thematic break (horizontal rule)
47    ThematicBreak,
48
49    /// Block quote
50    BlockQuote(Vec<Block>),
51
52    /// List (bullet or ordered)
53    List(List),
54
55    /// Fenced or indented code block
56    CodeBlock(CodeBlock),
57
58    /// Raw HTML block
59    HtmlBlock(String),
60
61    /// Link reference definition.  Preserved for round‑tripping.
62    Definition(LinkDefinition),
63
64    /// Tables
65    Table(Table),
66
67    /// Footnote definition
68    FootnoteDefinition(FootnoteDefinition),
69
70    /// GitHub alert block (NOTE, TIP, IMPORTANT, WARNING, CAUTION)
71    GitHubAlert(GitHubAlert),
72
73    /// Empty block. This is used to represent skipped blocks in the AST.
74    Empty,
75}
76
77/// Heading with level 1–6 and inline content.
78#[derive(Debug, Clone, PartialEq)]
79#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
80pub struct Heading {
81    /// Kind of heading (ATX or Setext) together with the level.
82    pub kind: HeadingKind,
83
84    /// Inlines that form the heading text (before trimming).
85    pub content: Vec<Inline>,
86}
87
88/// Heading with level 1–6 and inline content.
89#[derive(Debug, Clone, PartialEq)]
90#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
91pub enum HeadingKind {
92    /// ATX heading (`# Heading`)
93    Atx(u8),
94
95    /// Setext heading (`===` or `---`)
96    Setext(SetextHeading),
97}
98
99/// Setext heading with level and underline type.
100#[derive(Debug, Clone, PartialEq)]
101#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
102pub enum SetextHeading {
103    /// Setext heading with `=` underline
104    Level1,
105
106    /// Setext heading with `-` underline
107    Level2,
108}
109
110// ——————————————————————————————————————————————————————————————————————————
111// Lists
112// ——————————————————————————————————————————————————————————————————————————
113
114/// A list container — bullet or ordered.
115#[derive(Debug, Clone, PartialEq)]
116#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
117pub struct List {
118    /// Kind of list together with additional semantic data (start index or
119    /// bullet marker).
120    pub kind: ListKind,
121
122    /// List items in source order.
123    pub items: Vec<ListItem>,
124}
125
126/// Specifies *what kind* of list we have.
127#[derive(Debug, Clone, PartialEq)]
128#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
129pub enum ListKind {
130    /// Ordered list (`1.`, `42.` …) with an *optional* explicit start number.
131    Ordered(ListOrderedKindOptions),
132
133    /// Bullet list (`-`, `*`, or `+`) together with the concrete marker.
134    Bullet(ListBulletKind),
135}
136
137/// Specifies *what kind* of list we have.
138#[derive(Debug, Clone, PartialEq)]
139#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
140pub struct ListOrderedKindOptions {
141    /// Start index (1, 2, …) for ordered lists.
142    pub start: u64,
143}
144
145/// Concrete bullet character used for a bullet list.
146#[derive(Debug, Clone, Copy, PartialEq, Eq)]
147#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
148pub enum ListBulletKind {
149    /// `-` U+002D
150    Dash,
151
152    /// `*` U+002A
153    Star,
154
155    /// `+` U+002B
156    Plus,
157}
158
159/// Item within a list.
160#[derive(Debug, Clone, PartialEq)]
161#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
162pub struct ListItem {
163    /// Task‑list checkbox state (GFM task‑lists). `None` ⇒ not a task list.
164    pub task: Option<TaskState>,
165
166    /// Nested blocks inside the list item.
167    pub blocks: Vec<Block>,
168}
169
170/// State of a task‑list checkbox.
171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
172#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
173pub enum TaskState {
174    /// Unchecked (GFM task‑list item)
175    Incomplete,
176
177    /// Checked (GFM task‑list item)
178    Complete,
179}
180
181// ——————————————————————————————————————————————————————————————————————————
182// Code blocks
183// ——————————————————————————————————————————————————————————————————————————
184
185/// Fenced or indented code block.
186#[derive(Debug, Clone, PartialEq)]
187#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
188pub struct CodeBlock {
189    /// Distinguishes indented vs fenced code and stores the *info string*.
190    pub kind: CodeBlockKind,
191
192    /// Literal text inside the code block **without** final newline trimming.
193    pub literal: String,
194}
195
196/// The concrete kind of a code block.
197#[derive(Debug, Clone, PartialEq)]
198#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
199pub enum CodeBlockKind {
200    /// Indented block (≥ 4 spaces or 1 tab per line).
201    Indented,
202
203    /// Fenced block with *optional* info string (language, etc.).
204    Fenced {
205        /// Optional info string containing language identifier and other metadata
206        info: Option<String>,
207    },
208}
209
210// ——————————————————————————————————————————————————————————————————————————
211// Link reference definitions
212// ——————————————————————————————————————————————————————————————————————————
213
214/// Link reference definition (GFM) with a label, destination and optional title.
215#[derive(Debug, Clone, PartialEq)]
216#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
217pub struct LinkDefinition {
218    /// Link label (acts as the *identifier*).
219    pub label: Vec<Inline>,
220
221    /// Link URL (absolute or relative) or email address.
222    pub destination: String,
223
224    /// Optional title (for links and images).
225    pub title: Option<String>,
226}
227
228// ——————————————————————————————————————————————————————————————————————————
229// Tables
230// ——————————————————————————————————————————————————————————————————————————
231
232/// A table is a collection of rows and columns with optional alignment.
233/// The first row is the header row.
234#[derive(Debug, Clone, PartialEq)]
235#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
236pub struct Table {
237    /// Each row is a vector of *cells*; header row is **row 0**.
238    pub rows: Vec<TableRow>,
239
240    /// Column alignment; `alignments.len() == column_count`.
241    pub alignments: Vec<Alignment>,
242}
243
244/// A table row is a vector of cells (columns).
245pub type TableRow = Vec<TableCell>;
246
247/// A table cell is a vector of inlines (text, links, etc.).
248pub type TableCell = Vec<Inline>;
249
250/// Specifies the alignment of a table cell.
251#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
252#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
253pub enum Alignment {
254    /// No alignment specified
255    None,
256
257    /// Left aligned
258    #[default]
259    Left,
260
261    /// Right aligned
262    Center,
263
264    /// Right aligned
265    Right,
266}
267
268// ——————————————————————————————————————————————————————————————————————————
269// Footnotes
270// ——————————————————————————————————————————————————————————————————————————
271
272#[derive(Debug, Clone, PartialEq)]
273/// Footnote definition block (e.g., `[^label]: content`).
274#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
275pub struct FootnoteDefinition {
276    /// Normalized label (without leading `^`).
277    pub label: String,
278
279    /// Footnote content (blocks).
280    pub blocks: Vec<Block>,
281}
282
283// ——————————————————————————————————————————————————————————————————————————
284// Inline‑level nodes
285// ——————————————————————————————————————————————————————————————————————————
286
287/// Inline-level elements within paragraphs, headings, and other blocks.
288#[derive(Debug, Clone, PartialEq, Hash, Eq)]
289#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
290pub enum Inline {
291    /// Plain text (decoded entity references, preserved backslash escapes).
292    Text(String),
293
294    /// Hard line break
295    LineBreak,
296
297    /// Inline code span
298    Code(String),
299
300    /// Raw HTML fragment
301    Html(String),
302
303    /// Link to a destination with optional title.
304    Link(Link),
305
306    /// Reference link
307    LinkReference(LinkReference),
308
309    /// Image with optional title.
310    Image(Image),
311
312    /// Emphasis (`*` / `_`)
313    Emphasis(Vec<Inline>),
314    /// Strong emphasis (`**` / `__`)
315    Strong(Vec<Inline>),
316    /// Strikethrough (`~~`)
317    Strikethrough(Vec<Inline>),
318
319    /// Autolink (`<https://>` or `<mailto:…>`)
320    Autolink(String),
321
322    /// Footnote reference (`[^label]`)
323    FootnoteReference(String),
324
325    /// Empty element. This is used to represent skipped elements in the AST.
326    Empty,
327}
328
329/// Re‑usable structure for links and images (destination + children).
330#[derive(Debug, Clone, PartialEq, Hash, Eq)]
331#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
332pub struct Link {
333    /// Destination URL (absolute or relative) or email address.
334    pub destination: String,
335
336    /// Optional title (for links and images).
337    pub title: Option<String>,
338
339    /// Inline content (text, code, etc.) inside the link or image.
340    pub children: Vec<Inline>,
341}
342
343/// Re‑usable structure for links and images (destination + children).
344#[derive(Debug, Clone, PartialEq, Hash, Eq)]
345#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
346pub struct Image {
347    /// Image URL (absolute or relative).
348    pub destination: String,
349
350    /// Optional title.
351    pub title: Option<String>,
352
353    /// Alternative text.
354    pub alt: String,
355}
356
357/// Reference-style link (e.g., `[text][label]` or `[label][]`).
358#[derive(Debug, Clone, PartialEq, Hash, Eq)]
359#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
360pub struct LinkReference {
361    /// Link label (acts as the *identifier*).
362    pub label: Vec<Inline>,
363
364    /// Link text
365    pub text: Vec<Inline>,
366}