Skip to main content

litedoc_core/
ast.rs

1//! Abstract Syntax Tree types for LiteDoc documents.
2//!
3//! This module contains all the AST node types produced by the parser.
4//! The AST is designed to be:
5//!
6//! - **Zero-copy**: Uses `Cow<'a, str>` to borrow from input when possible
7//! - **Span-tracked**: Every node includes source location information
8//! - **Comprehensive**: Supports all LiteDoc and Markdown constructs
9
10use crate::span::Span;
11
12/// Parsing profile that determines syntax rules.
13///
14/// The profile affects how the parser interprets certain constructs
15/// and which features are enabled by default.
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum Profile {
18    /// Full LiteDoc syntax with explicit fencing.
19    ///
20    /// This is the native format optimized for AI consumption.
21    Litedoc,
22    /// CommonMark with GFM extensions (tables, strikethrough, autolinks).
23    Md,
24    /// Strict CommonMark compliance only.
25    MdStrict,
26}
27
28/// Optional modules that extend parser capabilities.
29///
30/// Modules can be enabled via the `@modules` directive or parser configuration.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum Module {
33    /// GFM-style tables with `|` delimiters.
34    Tables,
35    /// Footnote definitions and references.
36    Footnotes,
37    /// LaTeX-style math blocks.
38    Math,
39    /// Task list items with `[ ]` and `[x]` checkboxes.
40    Tasks,
41    /// `~~strikethrough~~` syntax.
42    Strikethrough,
43    /// Automatic URL detection and linking.
44    Autolink,
45    /// Raw HTML pass-through blocks.
46    Html,
47}
48
49/// A parsed LiteDoc document.
50///
51/// The document is the root of the AST and contains all parsed content.
52/// It preserves the parsing profile, enabled modules, optional metadata,
53/// and all content blocks.
54#[derive(Debug, Clone, PartialEq)]
55pub struct Document<'a> {
56    /// The parsing profile used (may differ from parser default if `@profile` directive present).
57    pub profile: Profile,
58    /// Enabled modules from `@modules` directive.
59    pub modules: Vec<Module>,
60    /// Optional metadata from `--- meta` block.
61    pub metadata: Option<Metadata<'a>>,
62    /// Content blocks in document order.
63    pub blocks: Vec<Block<'a>>,
64    /// Source span covering the entire document.
65    pub span: Span,
66}
67
68/// Document metadata from the `--- meta` block.
69///
70/// Metadata provides key-value pairs for document properties like
71/// title, author, date, tags, etc.
72#[derive(Debug, Clone, PartialEq)]
73pub struct Metadata<'a> {
74    /// Key-value entries in declaration order.
75    pub entries: Vec<(CowStr<'a>, AttrValue<'a>)>,
76    /// Source span of the metadata block.
77    pub span: Span,
78}
79
80/// Typed attribute values for metadata entries.
81///
82/// Values are automatically parsed into appropriate types:
83/// - Quoted strings → `Str`
84/// - `true`/`false` → `Bool`
85/// - Integers → `Int`
86/// - Decimals → `Float`
87/// - `[a, b, c]` → `List`
88#[derive(Debug, Clone, PartialEq)]
89pub enum AttrValue<'a> {
90    /// String value (quotes stripped).
91    Str(CowStr<'a>),
92    /// Boolean value.
93    Bool(bool),
94    /// 64-bit signed integer.
95    Int(i64),
96    /// 64-bit floating point.
97    Float(f64),
98    /// Nested list of values.
99    List(Vec<AttrValue<'a>>),
100}
101
102/// Block-level AST nodes.
103///
104/// Blocks are the primary structural elements of a document.
105/// Each variant represents a distinct block type with its own structure.
106#[derive(Debug, Clone, PartialEq)]
107pub enum Block<'a> {
108    /// Section heading (levels 1-6).
109    Heading(Heading<'a>),
110    /// Text paragraph with inline formatting.
111    Paragraph(Paragraph<'a>),
112    /// Ordered or unordered list.
113    List(List<'a>),
114    /// Fenced code block with optional language.
115    CodeBlock(CodeBlock<'a>),
116    /// Callout/admonition block (note, warning, etc.).
117    Callout(Callout<'a>),
118    /// Block quotation.
119    Quote(Quote<'a>),
120    /// Figure with image and caption.
121    Figure(Figure<'a>),
122    /// Data table with rows and cells.
123    Table(Table<'a>),
124    /// Footnote definitions.
125    Footnotes(Footnotes<'a>),
126    /// Mathematical equation (inline or display).
127    Math(MathBlock<'a>),
128    /// Horizontal rule / thematic break.
129    ThematicBreak(Span),
130    /// Raw HTML content (when HTML module enabled).
131    Html(HtmlBlock<'a>),
132    /// Unparsed/unknown block content (error recovery).
133    Raw(RawBlock<'a>),
134}
135
136/// Section heading with level and inline content.
137#[derive(Debug, Clone, PartialEq)]
138pub struct Heading<'a> {
139    /// Heading level (1-6).
140    pub level: u8,
141    /// Inline content (may include formatting).
142    pub content: Vec<Inline<'a>>,
143    /// Source span.
144    pub span: Span,
145}
146
147/// Text paragraph containing inline elements.
148#[derive(Debug, Clone, PartialEq)]
149pub struct Paragraph<'a> {
150    /// Inline content with formatting.
151    pub content: Vec<Inline<'a>>,
152    /// Source span.
153    pub span: Span,
154}
155
156/// List ordering style.
157#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158pub enum ListKind {
159    /// Numbered list (1. 2. 3.).
160    Ordered,
161    /// Bulleted list (- or *).
162    Unordered,
163}
164
165/// A list block containing multiple items.
166#[derive(Debug, Clone, PartialEq)]
167pub struct List<'a> {
168    /// Ordered or unordered.
169    pub kind: ListKind,
170    /// Starting number for ordered lists.
171    pub start: Option<u64>,
172    /// List items.
173    pub items: Vec<ListItem<'a>>,
174    /// Source span.
175    pub span: Span,
176}
177
178/// A single list item (may contain nested blocks).
179#[derive(Debug, Clone, PartialEq)]
180pub struct ListItem<'a> {
181    /// Content blocks within the item.
182    pub blocks: Vec<Block<'a>>,
183    /// Source span.
184    pub span: Span,
185}
186
187/// Fenced code block with syntax highlighting hint.
188#[derive(Debug, Clone, PartialEq)]
189pub struct CodeBlock<'a> {
190    /// Language identifier (e.g., "rust", "python").
191    pub lang: CowStr<'a>,
192    /// Raw code content.
193    pub content: CowStr<'a>,
194    /// Source span.
195    pub span: Span,
196}
197
198/// Callout/admonition block for notes, warnings, etc.
199#[derive(Debug, Clone, PartialEq)]
200pub struct Callout<'a> {
201    /// Callout type (note, warning, info, tip, etc.).
202    pub kind: CowStr<'a>,
203    /// Optional title override.
204    pub title: Option<CowStr<'a>>,
205    /// Content blocks.
206    pub blocks: Vec<Block<'a>>,
207    /// Source span.
208    pub span: Span,
209}
210
211/// Block quotation.
212#[derive(Debug, Clone, PartialEq)]
213pub struct Quote<'a> {
214    /// Quoted content blocks.
215    pub blocks: Vec<Block<'a>>,
216    /// Source span.
217    pub span: Span,
218}
219
220/// Figure with image and optional caption.
221#[derive(Debug, Clone, PartialEq)]
222pub struct Figure<'a> {
223    /// Image source URL or path.
224    pub src: CowStr<'a>,
225    /// Alt text for accessibility.
226    pub alt: CowStr<'a>,
227    /// Optional figure caption.
228    pub caption: Option<CowStr<'a>>,
229    /// Source span.
230    pub span: Span,
231}
232
233/// Data table with header and body rows.
234#[derive(Debug, Clone, PartialEq)]
235pub struct Table<'a> {
236    /// All table rows (first may be header).
237    pub rows: Vec<TableRow<'a>>,
238    /// Source span.
239    pub span: Span,
240}
241
242/// A single table row.
243#[derive(Debug, Clone, PartialEq)]
244pub struct TableRow<'a> {
245    /// Cells in this row.
246    pub cells: Vec<TableCell<'a>>,
247    /// Whether this is a header row.
248    pub header: bool,
249    /// Source span.
250    pub span: Span,
251}
252
253/// A single table cell.
254#[derive(Debug, Clone, PartialEq)]
255pub struct TableCell<'a> {
256    /// Cell content (inline elements).
257    pub content: Vec<Inline<'a>>,
258    /// Source span.
259    pub span: Span,
260}
261
262/// Container for footnote definitions.
263#[derive(Debug, Clone, PartialEq)]
264pub struct Footnotes<'a> {
265    /// Footnote definitions.
266    pub defs: Vec<FootnoteDef<'a>>,
267    /// Source span.
268    pub span: Span,
269}
270
271/// A single footnote definition.
272#[derive(Debug, Clone, PartialEq)]
273pub struct FootnoteDef<'a> {
274    /// Footnote label (e.g., "1", "note").
275    pub label: CowStr<'a>,
276    /// Footnote content blocks.
277    pub blocks: Vec<Block<'a>>,
278    /// Source span.
279    pub span: Span,
280}
281
282/// Mathematical equation block (LaTeX).
283#[derive(Debug, Clone, PartialEq)]
284pub struct MathBlock<'a> {
285    /// Whether this is display math (vs inline).
286    pub display: bool,
287    /// LaTeX content.
288    pub content: CowStr<'a>,
289    /// Source span.
290    pub span: Span,
291}
292
293/// Raw HTML block content.
294#[derive(Debug, Clone, PartialEq)]
295pub struct HtmlBlock<'a> {
296    /// Raw HTML content.
297    pub content: CowStr<'a>,
298    /// Source span.
299    pub span: Span,
300}
301
302/// Unparsed block content (for error recovery).
303#[derive(Debug, Clone, PartialEq)]
304pub struct RawBlock<'a> {
305    /// Raw unparsed content.
306    pub content: CowStr<'a>,
307    /// Source span.
308    pub span: Span,
309}
310
311/// Inline-level AST nodes (within paragraphs, headings, etc.).
312///
313/// Inline elements represent text-level formatting and can be nested.
314#[derive(Debug, Clone, PartialEq)]
315pub enum Inline<'a> {
316    /// Plain text content.
317    Text(Text<'a>),
318    /// Emphasized text (*italic*).
319    Emphasis(Emphasis<'a>),
320    /// Strong text (**bold**).
321    Strong(Strong<'a>),
322    /// Inline code (`code`).
323    CodeSpan(CodeSpan<'a>),
324    /// Hyperlink with label and URL.
325    Link(Link<'a>),
326    /// Auto-detected URL link.
327    AutoLink(AutoLink<'a>),
328    /// Strikethrough text (~~deleted~~).
329    Strikethrough(Strikethrough<'a>),
330    /// Footnote reference ([^label]).
331    FootnoteRef(FootnoteRef<'a>),
332    /// Hard line break (explicit).
333    HardBreak(Span),
334    /// Soft line break (newline in source).
335    SoftBreak(Span),
336}
337
338/// Plain text content.
339#[derive(Debug, Clone, PartialEq)]
340pub struct Text<'a> {
341    /// The text content.
342    pub content: CowStr<'a>,
343    /// Source span.
344    pub span: Span,
345}
346
347/// Emphasized (italic) text.
348#[derive(Debug, Clone, PartialEq)]
349pub struct Emphasis<'a> {
350    /// Nested inline content.
351    pub content: Vec<Inline<'a>>,
352    /// Source span.
353    pub span: Span,
354}
355
356/// Strong (bold) text.
357#[derive(Debug, Clone, PartialEq)]
358pub struct Strong<'a> {
359    /// Nested inline content.
360    pub content: Vec<Inline<'a>>,
361    /// Source span.
362    pub span: Span,
363}
364
365/// Strikethrough text.
366#[derive(Debug, Clone, PartialEq)]
367pub struct Strikethrough<'a> {
368    /// Nested inline content.
369    pub content: Vec<Inline<'a>>,
370    /// Source span.
371    pub span: Span,
372}
373
374/// Inline code span.
375#[derive(Debug, Clone, PartialEq)]
376pub struct CodeSpan<'a> {
377    /// Code content (not parsed for formatting).
378    pub content: CowStr<'a>,
379    /// Source span.
380    pub span: Span,
381}
382
383/// Hyperlink with label and destination.
384#[derive(Debug, Clone, PartialEq)]
385pub struct Link<'a> {
386    /// Link text (may contain nested formatting).
387    pub label: Vec<Inline<'a>>,
388    /// Link destination URL.
389    pub url: CowStr<'a>,
390    /// Optional title (for tooltips).
391    pub title: Option<CowStr<'a>>,
392    /// Source span.
393    pub span: Span,
394}
395
396/// Automatically detected URL.
397#[derive(Debug, Clone, PartialEq)]
398pub struct AutoLink<'a> {
399    /// The URL.
400    pub url: CowStr<'a>,
401    /// Source span.
402    pub span: Span,
403}
404
405/// Reference to a footnote.
406#[derive(Debug, Clone, PartialEq)]
407pub struct FootnoteRef<'a> {
408    /// Footnote label being referenced.
409    pub label: CowStr<'a>,
410    /// Source span.
411    pub span: Span,
412}
413
414/// Borrowed or owned string type for zero-copy parsing.
415pub type CowStr<'a> = std::borrow::Cow<'a, str>;