quillmark_core/document/mod.rs
1//! # Document Module
2//!
3//! Parsing functionality for markdown documents with YAML frontmatter.
4//!
5//! ## Overview
6//!
7//! The `document` module provides the [`Document::from_markdown`] function for parsing
8//! markdown documents into a typed in-memory model.
9//!
10//! ## Key Types
11//!
12//! - [`Document`]: Typed in-memory Quillmark document — `main` leaf plus composable leaves.
13//! - [`Leaf`]: A single metadata fence block, main or composable, with a sentinel,
14//! typed frontmatter, and a body.
15//! - [`Sentinel`]: Discriminates `QUILL:` main leaves from `KIND:` composable leaves.
16//! - [`Frontmatter`]: Ordered list of items (fields + comments) parsed from a YAML fence.
17//!
18//! ## Examples
19//!
20//! ### Basic Parsing
21//!
22//! ```
23//! use quillmark_core::Document;
24//!
25//! let markdown = r#"---
26//! QUILL: my_quill
27//! title: My Document
28//! author: John Doe
29//! ---
30//!
31//! # Introduction
32//!
33//! Document content here.
34//! "#;
35//!
36//! let doc = Document::from_markdown(markdown).unwrap();
37//! let title = doc.main()
38//! .frontmatter()
39//! .get("title")
40//! .and_then(|v| v.as_str())
41//! .unwrap_or("Untitled");
42//! assert_eq!(title, "My Document");
43//! assert_eq!(doc.leaves().len(), 0);
44//! ```
45//!
46//! ### Document with leaves
47//!
48//! ```
49//! use quillmark_core::Document;
50//!
51//! let markdown = "---\nQUILL: my_quill\ntitle: Catalog\n---\n\nIntro.\n\n```leaf\nKIND: product\nname: Widget\n```\n";
52//! let doc = Document::from_markdown(markdown).unwrap();
53//! assert_eq!(doc.leaves().len(), 1);
54//! assert_eq!(doc.leaves()[0].tag(), "product");
55//! ```
56//!
57//! ### Accessing the plate wire format
58//!
59//! ```
60//! use quillmark_core::Document;
61//!
62//! let doc = Document::from_markdown(
63//! "---\nQUILL: my_quill\ntitle: Hi\n---\n\nBody here.\n"
64//! ).unwrap();
65//! let json = doc.to_plate_json();
66//! assert_eq!(json["QUILL"], "my_quill");
67//! assert_eq!(json["title"], "Hi");
68//! assert_eq!(json["BODY"], "\nBody here.\n");
69//! assert!(json["LEAVES"].is_array());
70//! ```
71//!
72//! ## Error Handling
73//!
74//! [`Document::from_markdown`] returns errors for:
75//! - Malformed YAML syntax
76//! - Unclosed frontmatter blocks
77//! - Multiple global frontmatter blocks
78//! - Both QUILL and KIND specified in the same block
79//! - Reserved field name usage
80//! - Name collisions
81//!
82//! See [PARSE.md](https://github.com/nibsbin/quillmark/blob/main/designs/PARSE.md) for
83//! comprehensive documentation of the Extended YAML Metadata Standard.
84
85use crate::error::ParseError;
86use crate::version::QuillReference;
87use crate::Diagnostic;
88
89pub mod assemble;
90pub mod edit;
91pub mod emit;
92pub mod fences;
93pub mod frontmatter;
94pub mod limits;
95pub mod prescan;
96pub mod sentinel;
97
98pub use edit::EditError;
99pub use frontmatter::{Frontmatter, FrontmatterItem};
100
101// Re-export the sentinel type (defined below in this module file).
102// `Sentinel` is exported at the crate root via `lib.rs`.
103
104#[cfg(test)]
105mod tests;
106
107/// Parse result carrying both the parsed document and any non-fatal warnings
108/// (e.g. near-miss sentinel lints emitted per spec §4.2).
109#[derive(Debug)]
110pub struct ParseOutput {
111 /// The successfully parsed document.
112 pub document: Document,
113 /// Non-fatal warnings collected during parsing.
114 pub warnings: Vec<Diagnostic>,
115}
116
117/// Discriminator for a [`Leaf`]'s metadata fence.
118///
119/// The first fence in a Quillmark document carries `QUILL: <ref>` and is the
120/// document-level *main* leaf; every subsequent fence carries `KIND: <tag>`
121/// and is a composable leaf. `Sentinel` captures that distinction in the typed
122/// model so every fence is one uniform shape.
123#[derive(Debug, Clone, PartialEq)]
124pub enum Sentinel {
125 /// `QUILL: <ref>` — the document entry leaf.
126 Main(QuillReference),
127 /// `KIND: <tag>` — a composable leaf with the given tag.
128 Leaf(String),
129}
130
131impl Sentinel {
132 /// String form of this sentinel's value: the quill reference for `Main`,
133 /// the tag for `Leaf`.
134 pub fn as_str(&self) -> String {
135 match self {
136 Sentinel::Main(r) => r.to_string(),
137 Sentinel::Leaf(t) => t.clone(),
138 }
139 }
140
141 /// Returns `true` if this is a `Main` sentinel.
142 pub fn is_main(&self) -> bool {
143 matches!(self, Sentinel::Main(_))
144 }
145}
146
147/// A single metadata fence parsed from a Quillmark Markdown document.
148///
149/// A `Leaf` is the uniform shape for both the document entry (main) fence and
150/// composable leaf fences. `sentinel` distinguishes the two.
151///
152/// Every leaf has:
153/// - `sentinel` — the `QUILL` reference (for main) or `KIND` tag (for composable).
154/// - `frontmatter` — ordered items parsed from the YAML fence body (with the
155/// sentinel key already removed).
156/// - `body` — the Markdown text that follows the closing fence, up to the next
157/// fence (or EOF).
158///
159/// ## Leaf body absence
160///
161/// If a leaf block has no trailing Markdown content (e.g. the next block or
162/// EOF immediately follows the closing fence), `body` is the empty string `""`.
163/// It is never `None`; callers that need to distinguish "absent" from "empty"
164/// should check `leaf.body().is_empty()`.
165#[derive(Debug, Clone, PartialEq)]
166pub struct Leaf {
167 sentinel: Sentinel,
168 frontmatter: Frontmatter,
169 body: String,
170}
171
172impl Leaf {
173 /// Create a `Leaf` directly from a sentinel, a typed frontmatter, and a
174 /// body. Does **not** validate the sentinel tag or any field names —
175 /// callers are responsible for providing already-valid data. For
176 /// user-facing construction of composable leaves use [`Leaf::new`]
177 /// (defined in `edit.rs`).
178 pub fn new_with_sentinel(sentinel: Sentinel, frontmatter: Frontmatter, body: String) -> Self {
179 Self {
180 sentinel,
181 frontmatter,
182 body,
183 }
184 }
185
186 /// The sentinel discriminating this leaf as main or composable.
187 pub fn sentinel(&self) -> &Sentinel {
188 &self.sentinel
189 }
190
191 /// The leaf tag — the `KIND:` value for composable leaves, or the string
192 /// form of the quill reference for main leaves.
193 pub fn tag(&self) -> String {
194 self.sentinel.as_str()
195 }
196
197 /// Typed frontmatter (map-keyed view and ordered item list).
198 pub fn frontmatter(&self) -> &Frontmatter {
199 &self.frontmatter
200 }
201
202 /// Mutable access to the frontmatter.
203 pub fn frontmatter_mut(&mut self) -> &mut Frontmatter {
204 &mut self.frontmatter
205 }
206
207 /// Markdown body that follows this leaf's closing fence.
208 ///
209 /// Empty string when no trailing content is present.
210 pub fn body(&self) -> &str {
211 &self.body
212 }
213
214 /// Returns `true` if this is the document entry (main) leaf.
215 pub fn is_main(&self) -> bool {
216 self.sentinel.is_main()
217 }
218
219 /// Replace this leaf's sentinel. Internal helper; public mutators
220 /// ([`Document::set_quill_ref`], the parser) call this.
221 pub(crate) fn replace_sentinel(&mut self, sentinel: Sentinel) {
222 self.sentinel = sentinel;
223 }
224
225 /// Overwrite the body string. Internal helper used by [`Leaf::replace_body`].
226 pub(crate) fn overwrite_body(&mut self, body: String) {
227 self.body = body;
228 }
229}
230
231/// A fully-parsed, typed in-memory Quillmark document.
232///
233/// `Document` is the canonical representation of a Quillmark Markdown file.
234/// Markdown is both the import and export format; the structured data here
235/// is primary.
236///
237/// ## Structure
238///
239/// - `main` — the entry `Leaf` (sentinel is `Sentinel::Main(reference)`).
240/// - `leaves` — ordered composable leaves (each with `Sentinel::Leaf(tag)`).
241///
242/// Backend plates consume the flat JSON wire shape produced by
243/// [`Document::to_plate_json`]. That method is the **only** place in core
244/// that reconstructs `{"QUILL": ..., "LEAVES": [...], "BODY": "..."}`.
245#[derive(Debug, Clone)]
246pub struct Document {
247 main: Leaf,
248 leaves: Vec<Leaf>,
249 warnings: Vec<Diagnostic>,
250}
251
252// Equality is defined over the structural content only — `warnings` are
253// parse-time observations that depend on what the source text happened to
254// contain (near-miss sentinels, unsupported tag drops, etc.) and so differ
255// between a source document and its round-tripped emission. Two documents
256// are equal when their `main` and `leaves` match.
257impl PartialEq for Document {
258 fn eq(&self, other: &Self) -> bool {
259 self.main == other.main && self.leaves == other.leaves
260 }
261}
262
263impl Document {
264 /// Create a `Document` from a pre-built main `Leaf` and composable leaves.
265 ///
266 /// The caller must guarantee that `main.sentinel` is `Sentinel::Main(_)`
267 /// and every leaf in `leaves` has `sentinel` = `Sentinel::Leaf(_)`.
268 pub fn from_main_and_leaves(main: Leaf, leaves: Vec<Leaf>, warnings: Vec<Diagnostic>) -> Self {
269 debug_assert!(main.sentinel.is_main(), "main leaf must be Sentinel::Main");
270 debug_assert!(
271 leaves.iter().all(|c| !c.sentinel.is_main()),
272 "composable leaves must be Sentinel::Leaf"
273 );
274 Self {
275 main,
276 leaves,
277 warnings,
278 }
279 }
280
281 /// Parse a Quillmark Markdown document, discarding any non-fatal warnings.
282 pub fn from_markdown(markdown: &str) -> Result<Self, ParseError> {
283 assemble::decompose(markdown)
284 }
285
286 /// Parse a Quillmark Markdown document, returning warnings alongside the document.
287 pub fn from_markdown_with_warnings(markdown: &str) -> Result<ParseOutput, ParseError> {
288 assemble::decompose_with_warnings(markdown)
289 .map(|(document, warnings)| ParseOutput { document, warnings })
290 }
291
292 // ── Accessors ──────────────────────────────────────────────────────────────
293
294 /// The document's main (entry) leaf.
295 pub fn main(&self) -> &Leaf {
296 &self.main
297 }
298
299 /// Mutable access to the main leaf.
300 pub fn main_mut(&mut self) -> &mut Leaf {
301 &mut self.main
302 }
303
304 /// The quill reference (`name@version-selector`) carried by the main leaf's
305 /// sentinel. Convenience reader over `doc.main().sentinel()`.
306 pub fn quill_reference(&self) -> &QuillReference {
307 match &self.main.sentinel {
308 Sentinel::Main(r) => r,
309 Sentinel::Leaf(_) => {
310 unreachable!("main leaf must carry Sentinel::Main by construction")
311 }
312 }
313 }
314
315 /// Ordered list of composable leaf blocks.
316 pub fn leaves(&self) -> &[Leaf] {
317 &self.leaves
318 }
319
320 /// Mutable access to the composable leaves slice.
321 pub fn leaves_mut(&mut self) -> &mut [Leaf] {
322 &mut self.leaves
323 }
324
325 /// Internal mutable access to the backing `Vec<Leaf>`. Used by edit
326 /// operations ([`Document::push_leaf`], etc.) that need to insert or
327 /// remove elements.
328 pub(crate) fn leaves_vec_mut(&mut self) -> &mut Vec<Leaf> {
329 &mut self.leaves
330 }
331
332 /// Non-fatal warnings collected during parsing.
333 pub fn warnings(&self) -> &[Diagnostic] {
334 &self.warnings
335 }
336
337 // ── Wire format ────────────────────────────────────────────────────────────
338
339 /// Serialize this document to the JSON shape expected by backend plates.
340 ///
341 /// The output has the following top-level keys, which match what
342 /// `lib.typ.template` reads at Typst runtime:
343 ///
344 /// ```json
345 /// {
346 /// "QUILL": "<ref>",
347 /// "<field>": <value>,
348 /// ...
349 /// "BODY": "<global-body>",
350 /// "LEAVES": [
351 /// { "KIND": "<tag>", "<field>": <value>, ..., "BODY": "<leaf-body>" },
352 /// ...
353 /// ]
354 /// }
355 /// ```
356 ///
357 /// This is the **only** place in `quillmark-core` that knows about the plate
358 /// wire format. All internal consumers (Quill, backends) call this instead
359 /// of constructing the shape by hand.
360 pub fn to_plate_json(&self) -> serde_json::Value {
361 let mut map = serde_json::Map::new();
362
363 // QUILL first — plate authors expect this at the top.
364 map.insert(
365 "QUILL".to_string(),
366 serde_json::Value::String(self.quill_reference().to_string()),
367 );
368
369 // Frontmatter fields in insertion order.
370 for (key, value) in self.main.frontmatter.iter() {
371 map.insert(key.clone(), value.as_json().clone());
372 }
373
374 // Global body.
375 map.insert(
376 "BODY".to_string(),
377 serde_json::Value::String(self.main.body.clone()),
378 );
379
380 // Leaves array.
381 let leaves_array: Vec<serde_json::Value> = self
382 .leaves
383 .iter()
384 .map(|leaf| {
385 let mut leaf_map = serde_json::Map::new();
386 leaf_map.insert("KIND".to_string(), serde_json::Value::String(leaf.tag()));
387 for (key, value) in leaf.frontmatter.iter() {
388 leaf_map.insert(key.clone(), value.as_json().clone());
389 }
390 leaf_map.insert(
391 "BODY".to_string(),
392 serde_json::Value::String(leaf.body.clone()),
393 );
394 serde_json::Value::Object(leaf_map)
395 })
396 .collect();
397
398 map.insert("LEAVES".to_string(), serde_json::Value::Array(leaves_array));
399
400 serde_json::Value::Object(map)
401 }
402}