litedoc_core/ast.rs
1//! Abstract Syntax Tree types for LiteDoc documents.
2//!
3//! This module contains all the AST node types produced by the parser.
4//! The AST is designed to be:
5//!
6//! - **Zero-copy**: Uses `Cow<'a, str>` to borrow from input when possible
7//! - **Span-tracked**: Every node includes source location information
8//! - **Comprehensive**: Supports all LiteDoc and Markdown constructs
9
10use crate::span::Span;
11
12/// Parsing profile that determines syntax rules.
13///
14/// The profile affects how the parser interprets certain constructs
15/// and which features are enabled by default.
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum Profile {
18 /// Full LiteDoc syntax with explicit fencing.
19 ///
20 /// This is the native format optimized for AI consumption.
21 Litedoc,
22 /// CommonMark with GFM extensions (tables, strikethrough, autolinks).
23 Md,
24 /// Strict CommonMark compliance only.
25 MdStrict,
26}
27
28/// Optional modules that extend parser capabilities.
29///
30/// Modules can be enabled via the `@modules` directive or parser configuration.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum Module {
33 /// GFM-style tables with `|` delimiters.
34 Tables,
35 /// Footnote definitions and references.
36 Footnotes,
37 /// LaTeX-style math blocks.
38 Math,
39 /// Task list items with `[ ]` and `[x]` checkboxes.
40 Tasks,
41 /// `~~strikethrough~~` syntax.
42 Strikethrough,
43 /// Automatic URL detection and linking.
44 Autolink,
45 /// Raw HTML pass-through blocks.
46 Html,
47}
48
49/// A parsed LiteDoc document.
50///
51/// The document is the root of the AST and contains all parsed content.
52/// It preserves the parsing profile, enabled modules, optional metadata,
53/// and all content blocks.
54#[derive(Debug, Clone, PartialEq)]
55pub struct Document<'a> {
56 /// The parsing profile used (may differ from parser default if `@profile` directive present).
57 pub profile: Profile,
58 /// Enabled modules from `@modules` directive.
59 pub modules: Vec<Module>,
60 /// Optional metadata from `--- meta` block.
61 pub metadata: Option<Metadata<'a>>,
62 /// Content blocks in document order.
63 pub blocks: Vec<Block<'a>>,
64 /// Source span covering the entire document.
65 pub span: Span,
66}
67
68/// Document metadata from the `--- meta` block.
69///
70/// Metadata provides key-value pairs for document properties like
71/// title, author, date, tags, etc.
72#[derive(Debug, Clone, PartialEq)]
73pub struct Metadata<'a> {
74 /// Key-value entries in declaration order.
75 pub entries: Vec<(CowStr<'a>, AttrValue<'a>)>,
76 /// Source span of the metadata block.
77 pub span: Span,
78}
79
80/// Typed attribute values for metadata entries.
81///
82/// Values are automatically parsed into appropriate types:
83/// - Quoted strings → `Str`
84/// - `true`/`false` → `Bool`
85/// - Integers → `Int`
86/// - Decimals → `Float`
87/// - `[a, b, c]` → `List`
88#[derive(Debug, Clone, PartialEq)]
89pub enum AttrValue<'a> {
90 /// String value (quotes stripped).
91 Str(CowStr<'a>),
92 /// Boolean value.
93 Bool(bool),
94 /// 64-bit signed integer.
95 Int(i64),
96 /// 64-bit floating point.
97 Float(f64),
98 /// Nested list of values.
99 List(Vec<AttrValue<'a>>),
100}
101
102/// Block-level AST nodes.
103///
104/// Blocks are the primary structural elements of a document.
105/// Each variant represents a distinct block type with its own structure.
106#[derive(Debug, Clone, PartialEq)]
107pub enum Block<'a> {
108 /// Section heading (levels 1-6).
109 Heading(Heading<'a>),
110 /// Text paragraph with inline formatting.
111 Paragraph(Paragraph<'a>),
112 /// Ordered or unordered list.
113 List(List<'a>),
114 /// Fenced code block with optional language.
115 CodeBlock(CodeBlock<'a>),
116 /// Callout/admonition block (note, warning, etc.).
117 Callout(Callout<'a>),
118 /// Block quotation.
119 Quote(Quote<'a>),
120 /// Figure with image and caption.
121 Figure(Figure<'a>),
122 /// Data table with rows and cells.
123 Table(Table<'a>),
124 /// Footnote definitions.
125 Footnotes(Footnotes<'a>),
126 /// Mathematical equation (inline or display).
127 Math(MathBlock<'a>),
128 /// Horizontal rule / thematic break.
129 ThematicBreak(Span),
130 /// Raw HTML content (when HTML module enabled).
131 Html(HtmlBlock<'a>),
132 /// Unparsed/unknown block content (error recovery).
133 Raw(RawBlock<'a>),
134}
135
136/// Section heading with level and inline content.
137#[derive(Debug, Clone, PartialEq)]
138pub struct Heading<'a> {
139 /// Heading level (1-6).
140 pub level: u8,
141 /// Inline content (may include formatting).
142 pub content: Vec<Inline<'a>>,
143 /// Source span.
144 pub span: Span,
145}
146
147/// Text paragraph containing inline elements.
148#[derive(Debug, Clone, PartialEq)]
149pub struct Paragraph<'a> {
150 /// Inline content with formatting.
151 pub content: Vec<Inline<'a>>,
152 /// Source span.
153 pub span: Span,
154}
155
156/// List ordering style.
157#[derive(Debug, Clone, Copy, PartialEq, Eq)]
158pub enum ListKind {
159 /// Numbered list (1. 2. 3.).
160 Ordered,
161 /// Bulleted list (- or *).
162 Unordered,
163}
164
165/// A list block containing multiple items.
166#[derive(Debug, Clone, PartialEq)]
167pub struct List<'a> {
168 /// Ordered or unordered.
169 pub kind: ListKind,
170 /// Starting number for ordered lists.
171 pub start: Option<u64>,
172 /// List items.
173 pub items: Vec<ListItem<'a>>,
174 /// Source span.
175 pub span: Span,
176}
177
178/// A single list item (may contain nested blocks).
179#[derive(Debug, Clone, PartialEq)]
180pub struct ListItem<'a> {
181 /// Content blocks within the item.
182 pub blocks: Vec<Block<'a>>,
183 /// Source span.
184 pub span: Span,
185}
186
187/// Fenced code block with syntax highlighting hint.
188#[derive(Debug, Clone, PartialEq)]
189pub struct CodeBlock<'a> {
190 /// Language identifier (e.g., "rust", "python").
191 pub lang: CowStr<'a>,
192 /// Raw code content.
193 pub content: CowStr<'a>,
194 /// Source span.
195 pub span: Span,
196}
197
198/// Callout/admonition block for notes, warnings, etc.
199#[derive(Debug, Clone, PartialEq)]
200pub struct Callout<'a> {
201 /// Callout type (note, warning, info, tip, etc.).
202 pub kind: CowStr<'a>,
203 /// Optional title override.
204 pub title: Option<CowStr<'a>>,
205 /// Content blocks.
206 pub blocks: Vec<Block<'a>>,
207 /// Source span.
208 pub span: Span,
209}
210
211/// Block quotation.
212#[derive(Debug, Clone, PartialEq)]
213pub struct Quote<'a> {
214 /// Quoted content blocks.
215 pub blocks: Vec<Block<'a>>,
216 /// Source span.
217 pub span: Span,
218}
219
220/// Figure with image and optional caption.
221#[derive(Debug, Clone, PartialEq)]
222pub struct Figure<'a> {
223 /// Image source URL or path.
224 pub src: CowStr<'a>,
225 /// Alt text for accessibility.
226 pub alt: CowStr<'a>,
227 /// Optional figure caption.
228 pub caption: Option<CowStr<'a>>,
229 /// Source span.
230 pub span: Span,
231}
232
233/// Data table with header and body rows.
234#[derive(Debug, Clone, PartialEq)]
235pub struct Table<'a> {
236 /// All table rows (first may be header).
237 pub rows: Vec<TableRow<'a>>,
238 /// Source span.
239 pub span: Span,
240}
241
242/// A single table row.
243#[derive(Debug, Clone, PartialEq)]
244pub struct TableRow<'a> {
245 /// Cells in this row.
246 pub cells: Vec<TableCell<'a>>,
247 /// Whether this is a header row.
248 pub header: bool,
249 /// Source span.
250 pub span: Span,
251}
252
253/// A single table cell.
254#[derive(Debug, Clone, PartialEq)]
255pub struct TableCell<'a> {
256 /// Cell content (inline elements).
257 pub content: Vec<Inline<'a>>,
258 /// Source span.
259 pub span: Span,
260}
261
262/// Container for footnote definitions.
263#[derive(Debug, Clone, PartialEq)]
264pub struct Footnotes<'a> {
265 /// Footnote definitions.
266 pub defs: Vec<FootnoteDef<'a>>,
267 /// Source span.
268 pub span: Span,
269}
270
271/// A single footnote definition.
272#[derive(Debug, Clone, PartialEq)]
273pub struct FootnoteDef<'a> {
274 /// Footnote label (e.g., "1", "note").
275 pub label: CowStr<'a>,
276 /// Footnote content blocks.
277 pub blocks: Vec<Block<'a>>,
278 /// Source span.
279 pub span: Span,
280}
281
282/// Mathematical equation block (LaTeX).
283#[derive(Debug, Clone, PartialEq)]
284pub struct MathBlock<'a> {
285 /// Whether this is display math (vs inline).
286 pub display: bool,
287 /// LaTeX content.
288 pub content: CowStr<'a>,
289 /// Source span.
290 pub span: Span,
291}
292
293/// Raw HTML block content.
294#[derive(Debug, Clone, PartialEq)]
295pub struct HtmlBlock<'a> {
296 /// Raw HTML content.
297 pub content: CowStr<'a>,
298 /// Source span.
299 pub span: Span,
300}
301
302/// Unparsed block content (for error recovery).
303#[derive(Debug, Clone, PartialEq)]
304pub struct RawBlock<'a> {
305 /// Raw unparsed content.
306 pub content: CowStr<'a>,
307 /// Source span.
308 pub span: Span,
309}
310
311/// Inline-level AST nodes (within paragraphs, headings, etc.).
312///
313/// Inline elements represent text-level formatting and can be nested.
314#[derive(Debug, Clone, PartialEq)]
315pub enum Inline<'a> {
316 /// Plain text content.
317 Text(Text<'a>),
318 /// Emphasized text (*italic*).
319 Emphasis(Emphasis<'a>),
320 /// Strong text (**bold**).
321 Strong(Strong<'a>),
322 /// Inline code (`code`).
323 CodeSpan(CodeSpan<'a>),
324 /// Hyperlink with label and URL.
325 Link(Link<'a>),
326 /// Auto-detected URL link.
327 AutoLink(AutoLink<'a>),
328 /// Strikethrough text (~~deleted~~).
329 Strikethrough(Strikethrough<'a>),
330 /// Footnote reference ([^label]).
331 FootnoteRef(FootnoteRef<'a>),
332 /// Hard line break (explicit).
333 HardBreak(Span),
334 /// Soft line break (newline in source).
335 SoftBreak(Span),
336}
337
338/// Plain text content.
339#[derive(Debug, Clone, PartialEq)]
340pub struct Text<'a> {
341 /// The text content.
342 pub content: CowStr<'a>,
343 /// Source span.
344 pub span: Span,
345}
346
347/// Emphasized (italic) text.
348#[derive(Debug, Clone, PartialEq)]
349pub struct Emphasis<'a> {
350 /// Nested inline content.
351 pub content: Vec<Inline<'a>>,
352 /// Source span.
353 pub span: Span,
354}
355
356/// Strong (bold) text.
357#[derive(Debug, Clone, PartialEq)]
358pub struct Strong<'a> {
359 /// Nested inline content.
360 pub content: Vec<Inline<'a>>,
361 /// Source span.
362 pub span: Span,
363}
364
365/// Strikethrough text.
366#[derive(Debug, Clone, PartialEq)]
367pub struct Strikethrough<'a> {
368 /// Nested inline content.
369 pub content: Vec<Inline<'a>>,
370 /// Source span.
371 pub span: Span,
372}
373
374/// Inline code span.
375#[derive(Debug, Clone, PartialEq)]
376pub struct CodeSpan<'a> {
377 /// Code content (not parsed for formatting).
378 pub content: CowStr<'a>,
379 /// Source span.
380 pub span: Span,
381}
382
383/// Hyperlink with label and destination.
384#[derive(Debug, Clone, PartialEq)]
385pub struct Link<'a> {
386 /// Link text (may contain nested formatting).
387 pub label: Vec<Inline<'a>>,
388 /// Link destination URL.
389 pub url: CowStr<'a>,
390 /// Optional title (for tooltips).
391 pub title: Option<CowStr<'a>>,
392 /// Source span.
393 pub span: Span,
394}
395
396/// Automatically detected URL.
397#[derive(Debug, Clone, PartialEq)]
398pub struct AutoLink<'a> {
399 /// The URL.
400 pub url: CowStr<'a>,
401 /// Source span.
402 pub span: Span,
403}
404
405/// Reference to a footnote.
406#[derive(Debug, Clone, PartialEq)]
407pub struct FootnoteRef<'a> {
408 /// Footnote label being referenced.
409 pub label: CowStr<'a>,
410 /// Source span.
411 pub span: Span,
412}
413
414/// Borrowed or owned string type for zero-copy parsing.
415pub type CowStr<'a> = std::borrow::Cow<'a, str>;