markdown_ppp/ast/
generic.rs

1//! Generic Abstract Syntax Tree (AST) for CommonMark + GitHub Flavored Markdown (GFM)
2//! =====================================================================================
3//!
4//! This module provides generic versions of all AST structures that allow attaching
5//! user-defined data to any AST node. The generic parameter `T` represents the type
6//! of user data that can be associated with each element.
7//!
8//! # Features
9//!
10//! - **Zero-cost abstraction**: When `T = ()`, no additional memory is used
11//! - **Flexible user data**: Support for any user-defined type
12//! - **Serde compatibility**: Proper serialization with optional user data fields
13//! - **Type safety**: Compile-time guarantees about data presence
14//!
15//! # Examples
16//!
17//! ```rust
18//! use markdown_ppp::ast::generic::*;
19//!
20//! // AST without user data (equivalent to regular AST)
21//! type SimpleDocument = Document<()>;
22//!
23//! // AST with element IDs
24//! #[derive(Debug, Clone, PartialEq)]
25//! struct ElementId(u32);
26//! type DocumentWithIds = Document<ElementId>;
27//!
28//! // AST with source information
29//! #[derive(Debug, Clone, PartialEq)]
30//! struct SourceInfo {
31//!     line: u32,
32//!     column: u32,
33//! }
34//! type DocumentWithSource = Document<SourceInfo>;
35//! ```
36
37// Re-export types from parent module that don't need generics
38pub use super::{
39    Alignment, CodeBlockKind, GitHubAlert, GitHubAlertType, HeadingKind, ListBulletKind,
40    ListOrderedKindOptions, SetextHeading, TaskState,
41};
42
43// ——————————————————————————————————————————————————————————————————————————
44// Document root
45// ——————————————————————————————————————————————————————————————————————————
46
47/// Root of a Markdown document with optional user data
48#[derive(Debug, Clone, PartialEq)]
49#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
50pub struct Document<T = ()> {
51    /// Top‑level block sequence **in document order**.
52    pub blocks: Vec<Block<T>>,
53
54    /// User-defined data associated with this document
55    #[cfg_attr(feature = "ast-serde", serde(default))]
56    pub user_data: T,
57}
58
59// ——————————————————————————————————————————————————————————————————————————
60// Block‑level nodes
61// ——————————————————————————————————————————————————————————————————————————
62
63/// Block‑level constructs in the order they appear in the CommonMark spec.
64#[derive(Debug, Clone, PartialEq)]
65#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
66pub enum Block<T = ()> {
67    /// Ordinary paragraph
68    Paragraph {
69        content: Vec<Inline<T>>,
70        #[cfg_attr(feature = "ast-serde", serde(default))]
71        user_data: T,
72    },
73
74    /// ATX (`# Heading`) or Setext (`===`) heading
75    Heading(Heading<T>),
76
77    /// Thematic break (horizontal rule)
78    ThematicBreak {
79        #[cfg_attr(feature = "ast-serde", serde(default))]
80        user_data: T,
81    },
82
83    /// Block quote
84    BlockQuote {
85        blocks: Vec<Block<T>>,
86        #[cfg_attr(feature = "ast-serde", serde(default))]
87        user_data: T,
88    },
89
90    /// List (bullet or ordered)
91    List(List<T>),
92
93    /// Fenced or indented code block
94    CodeBlock(CodeBlock<T>),
95
96    /// Raw HTML block
97    HtmlBlock {
98        content: String,
99        #[cfg_attr(feature = "ast-serde", serde(default))]
100        user_data: T,
101    },
102
103    /// Link reference definition. Preserved for round‑tripping.
104    Definition(LinkDefinition<T>),
105
106    /// Tables
107    Table(Table<T>),
108
109    /// Footnote definition
110    FootnoteDefinition(FootnoteDefinition<T>),
111
112    /// GitHub alert block (NOTE, TIP, IMPORTANT, WARNING, CAUTION)
113    GitHubAlert(GitHubAlertNode<T>),
114
115    /// Empty block. This is used to represent skipped blocks in the AST.
116    Empty {
117        #[cfg_attr(feature = "ast-serde", serde(default))]
118        user_data: T,
119    },
120}
121
122/// Heading with level 1–6 and inline content.
123#[derive(Debug, Clone, PartialEq)]
124#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
125pub struct Heading<T = ()> {
126    /// Kind of heading (ATX or Setext) together with the level.
127    pub kind: HeadingKind,
128
129    /// Inlines that form the heading text (before trimming).
130    pub content: Vec<Inline<T>>,
131
132    /// User-defined data associated with this heading
133    #[cfg_attr(feature = "ast-serde", serde(default))]
134    pub user_data: T,
135}
136
137// ——————————————————————————————————————————————————————————————————————————
138// Lists
139// ——————————————————————————————————————————————————————————————————————————
140
141/// A list container — bullet or ordered.
142#[derive(Debug, Clone, PartialEq)]
143#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
144pub struct List<T = ()> {
145    /// Kind of list together with additional semantic data (start index or
146    /// bullet marker).
147    pub kind: ListKind,
148
149    /// List items in source order.
150    pub items: Vec<ListItem<T>>,
151
152    /// User-defined data associated with this list
153    #[cfg_attr(feature = "ast-serde", serde(default))]
154    pub user_data: T,
155}
156
157/// Specifies *what kind* of list we have.
158#[derive(Debug, Clone, PartialEq)]
159#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
160pub enum ListKind {
161    /// Ordered list (`1.`, `42.` …) with an *optional* explicit start number.
162    Ordered(ListOrderedKindOptions),
163
164    /// Bullet list (`-`, `*`, or `+`) together with the concrete marker.
165    Bullet(ListBulletKind),
166}
167
168/// Item within a list.
169#[derive(Debug, Clone, PartialEq)]
170#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
171pub struct ListItem<T = ()> {
172    /// Task‑list checkbox state (GFM task‑lists). `None` ⇒ not a task list.
173    pub task: Option<TaskState>,
174
175    /// Nested blocks inside the list item.
176    pub blocks: Vec<Block<T>>,
177
178    /// User-defined data associated with this list item
179    #[cfg_attr(feature = "ast-serde", serde(default))]
180    pub user_data: T,
181}
182
183// ——————————————————————————————————————————————————————————————————————————
184// Code blocks
185// ——————————————————————————————————————————————————————————————————————————
186
187/// Fenced or indented code block.
188#[derive(Debug, Clone, PartialEq)]
189#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
190pub struct CodeBlock<T = ()> {
191    /// Distinguishes indented vs fenced code and stores the *info string*.
192    pub kind: CodeBlockKind,
193
194    /// Literal text inside the code block **without** final newline trimming.
195    pub literal: String,
196
197    /// User-defined data associated with this code block
198    #[cfg_attr(feature = "ast-serde", serde(default))]
199    pub user_data: T,
200}
201
202// ——————————————————————————————————————————————————————————————————————————
203// Link reference definitions
204// ——————————————————————————————————————————————————————————————————————————
205
206/// Link reference definition (GFM) with a label, destination and optional title.
207#[derive(Debug, Clone, PartialEq)]
208#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
209pub struct LinkDefinition<T = ()> {
210    /// Link label (acts as the *identifier*).
211    pub label: Vec<Inline<T>>,
212
213    /// Link URL (absolute or relative) or email address.
214    pub destination: String,
215
216    /// Optional title (for links and images).
217    pub title: Option<String>,
218
219    /// User-defined data associated with this link definition
220    #[cfg_attr(feature = "ast-serde", serde(default))]
221    pub user_data: T,
222}
223
224// ——————————————————————————————————————————————————————————————————————————
225// Tables
226// ——————————————————————————————————————————————————————————————————————————
227
228/// A table is a collection of rows and columns with optional alignment.
229/// The first row is the header row.
230#[derive(Debug, Clone, PartialEq)]
231#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
232pub struct Table<T = ()> {
233    /// Each row is a vector of *cells*; header row is **row 0**.
234    pub rows: Vec<TableRow<T>>,
235
236    /// Column alignment; `alignments.len() == column_count`.
237    pub alignments: Vec<Alignment>,
238
239    /// User-defined data associated with this table
240    #[cfg_attr(feature = "ast-serde", serde(default))]
241    pub user_data: T,
242}
243
244/// A table row is a vector of cells (columns).
245pub type TableRow<T> = Vec<TableCell<T>>;
246
247/// A table cell is a vector of inlines (text, links, etc.).
248pub type TableCell<T> = Vec<Inline<T>>;
249
250// ——————————————————————————————————————————————————————————————————————————
251// Footnotes
252// ——————————————————————————————————————————————————————————————————————————
253
254/// Footnote definition block (e.g., `[^label]: content`).
255#[derive(Debug, Clone, PartialEq)]
256#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
257pub struct FootnoteDefinition<T = ()> {
258    /// Normalized label (without leading `^`).
259    pub label: String,
260
261    /// Footnote content (blocks).
262    pub blocks: Vec<Block<T>>,
263
264    /// User-defined data associated with this footnote definition
265    #[cfg_attr(feature = "ast-serde", serde(default))]
266    pub user_data: T,
267}
268
269// ——————————————————————————————————————————————————————————————————————————
270// GitHub Alerts
271// ——————————————————————————————————————————————————————————————————————————
272
273/// GitHub alert block with user data support
274#[derive(Debug, Clone, PartialEq)]
275#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
276pub struct GitHubAlertNode<T = ()> {
277    /// Type of alert (NOTE, TIP, IMPORTANT, WARNING, CAUTION)
278    pub alert_type: GitHubAlertType,
279
280    /// Content blocks within the alert
281    pub blocks: Vec<Block<T>>,
282
283    /// User-defined data associated with this GitHub alert
284    #[cfg_attr(feature = "ast-serde", serde(default))]
285    pub user_data: T,
286}
287
288// ——————————————————————————————————————————————————————————————————————————
289// Inline‑level nodes
290// ——————————————————————————————————————————————————————————————————————————
291
292/// Inline-level elements within paragraphs, headings, and other blocks.
293#[derive(Debug, Clone, PartialEq)]
294#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
295pub enum Inline<T = ()> {
296    /// Plain text (decoded entity references, preserved backslash escapes).
297    Text {
298        content: String,
299        #[cfg_attr(feature = "ast-serde", serde(default))]
300        user_data: T,
301    },
302
303    /// Hard line break
304    LineBreak {
305        #[cfg_attr(feature = "ast-serde", serde(default))]
306        user_data: T,
307    },
308
309    /// Inline code span
310    Code {
311        content: String,
312        #[cfg_attr(feature = "ast-serde", serde(default))]
313        user_data: T,
314    },
315
316    /// Raw HTML fragment
317    Html {
318        content: String,
319        #[cfg_attr(feature = "ast-serde", serde(default))]
320        user_data: T,
321    },
322
323    /// Link to a destination with optional title.
324    Link(Link<T>),
325
326    /// Reference link
327    LinkReference(LinkReference<T>),
328
329    /// Image with optional title.
330    Image(Image<T>),
331
332    /// Emphasis (`*` / `_`)
333    Emphasis {
334        content: Vec<Inline<T>>,
335        #[cfg_attr(feature = "ast-serde", serde(default))]
336        user_data: T,
337    },
338
339    /// Strong emphasis (`**` / `__`)
340    Strong {
341        content: Vec<Inline<T>>,
342        #[cfg_attr(feature = "ast-serde", serde(default))]
343        user_data: T,
344    },
345
346    /// Strikethrough (`~~`)
347    Strikethrough {
348        content: Vec<Inline<T>>,
349        #[cfg_attr(feature = "ast-serde", serde(default))]
350        user_data: T,
351    },
352
353    /// Autolink (`<https://>` or `<mailto:…>`)
354    Autolink {
355        url: String,
356        #[cfg_attr(feature = "ast-serde", serde(default))]
357        user_data: T,
358    },
359
360    /// Footnote reference (`[^label]`)
361    FootnoteReference {
362        label: String,
363        #[cfg_attr(feature = "ast-serde", serde(default))]
364        user_data: T,
365    },
366
367    /// Empty element. This is used to represent skipped elements in the AST.
368    Empty {
369        #[cfg_attr(feature = "ast-serde", serde(default))]
370        user_data: T,
371    },
372}
373
374/// Re‑usable structure for links and images (destination + children).
375#[derive(Debug, Clone, PartialEq)]
376#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
377pub struct Link<T = ()> {
378    /// Destination URL (absolute or relative) or email address.
379    pub destination: String,
380
381    /// Optional title (for links and images).
382    pub title: Option<String>,
383
384    /// Inline content (text, code, etc.) inside the link or image.
385    pub children: Vec<Inline<T>>,
386
387    /// User-defined data associated with this link
388    #[cfg_attr(feature = "ast-serde", serde(default))]
389    pub user_data: T,
390}
391
392/// Re‑usable structure for images.
393#[derive(Debug, Clone, PartialEq)]
394#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
395pub struct Image<T = ()> {
396    /// Image URL (absolute or relative).
397    pub destination: String,
398
399    /// Optional title.
400    pub title: Option<String>,
401
402    /// Alternative text.
403    pub alt: String,
404
405    /// User-defined data associated with this image
406    #[cfg_attr(feature = "ast-serde", serde(default))]
407    pub user_data: T,
408}
409
410/// Reference-style link (e.g., `[text][label]` or `[label][]`).
411#[derive(Debug, Clone, PartialEq)]
412#[cfg_attr(feature = "ast-serde", derive(serde::Serialize, serde::Deserialize))]
413pub struct LinkReference<T = ()> {
414    /// Link label (acts as the *identifier*).
415    pub label: Vec<Inline<T>>,
416
417    /// Link text
418    pub text: Vec<Inline<T>>,
419
420    /// User-defined data associated with this link reference
421    #[cfg_attr(feature = "ast-serde", serde(default))]
422    pub user_data: T,
423}
424
425// ——————————————————————————————————————————————————————————————————————————
426// Default implementations for common cases
427// ——————————————————————————————————————————————————————————————————————————
428
429impl<T: Default> Default for Document<T> {
430    fn default() -> Self {
431        Self {
432            blocks: Vec::new(),
433            user_data: T::default(),
434        }
435    }
436}
437
438impl<T: Default> Default for Heading<T> {
439    fn default() -> Self {
440        Self {
441            kind: HeadingKind::Atx(1),
442            content: Vec::new(),
443            user_data: T::default(),
444        }
445    }
446}
447
448impl<T: Default> Default for List<T> {
449    fn default() -> Self {
450        Self {
451            kind: ListKind::Bullet(ListBulletKind::Dash),
452            items: Vec::new(),
453            user_data: T::default(),
454        }
455    }
456}
457
458impl<T: Default> Default for Table<T> {
459    fn default() -> Self {
460        Self {
461            rows: Vec::new(),
462            alignments: Vec::new(),
463            user_data: T::default(),
464        }
465    }
466}