Skip to main content

quillmark_core/document/
emit.rs

1//! Canonical Markdown emission for [`Document`].
2//!
3//! This module implements [`Document::to_markdown`], which converts a typed
4//! in-memory `Document` back into canonical Quillmark Markdown.
5//!
6//! ## YAML emission strategy
7//!
8//! `serde-saphyr::SerializerOptions::quote_all` was evaluated (spike, 2026-04-21)
9//! and found to emit single-quoted strings for ordinary scalars like `"on"` and
10//! `"01234"` — switching to double quotes only when the string contains a single
11//! quote, backslash, or control character.  That behaviour is correct for
12//! round-trip type-fidelity (single-quoted YAML strings are re-parsed as strings),
13//! but the Quillmark spec (§5.2) requires **always double-quoted, JSON-style
14//! escaping**.  Because `SerializerOptions` provides no "force double-quote" knob,
15//! the YAML value block is generated by a hand-written emitter in this module.
16//!
17//! The hand-written emitter is small (< 120 lines), covers exactly the
18//! `QuillValue` type variants, and gives complete control over quoting style and
19//! indentation without pulling in additional abstractions.
20
21use serde_json::Value as JsonValue;
22
23use super::frontmatter::FrontmatterItem;
24use super::prescan::{CommentPathSegment, NestedComment};
25use super::{Card, Document, Sentinel};
26
27// ── Public entry point ────────────────────────────────────────────────────────
28
29impl Document {
30    /// Emit canonical Quillmark Markdown from this document.
31    ///
32    /// # Contract
33    ///
34    /// 1. **Type-fidelity round-trip.** `Document::from_markdown(&doc.to_markdown())`
35    ///    returns a `Document` equal to `doc` by value *and* by type variant.
36    ///    `QuillValue::String("on")` round-trips as a string, never as a bool.
37    ///    `QuillValue::String("01234")` round-trips as a string, never as an
38    ///    integer.  This guarantee is the whole point of owning emission.
39    ///
40    /// 2. **Emit-idempotent.** `to_markdown` is a pure function of `doc`; two
41    ///    calls on the same `doc` return byte-equal strings.
42    ///
43    /// Byte-equality with the *original source* is **not** guaranteed.
44    ///
45    /// # Emission rules (§5.2)
46    ///
47    /// - Line endings: `\n` only.  CRLF normalization happens on import.
48    /// - Frontmatter: `---\n`, `QUILL: <ref>` first, remaining fields in
49    ///   `IndexMap` insertion order, `---\n`, blank line.
50    /// - Cards: one blank line before each, fence `---\nCARD: <tag>\n<fields>\n---\n<body>`.
51    /// - Body: emitted verbatim after frontmatter (and cards).
52    /// - Mappings and sequences: **block style** at every nesting level.
53    /// - Booleans: `true` / `false`.
54    /// - Null: `null`.
55    /// - Numbers: bare literals (integer or float as stored in `serde_json::Value`).
56    /// - **Strings: always double-quoted**, JSON-style escaping
57    ///   (`\"`, `\\`, `\n`, `\t`, `\uXXXX` for control chars).  This is the
58    ///   load-bearing rule that guarantees type fidelity.
59    /// - Multi-line strings: double-quoted with `\n` escape sequences.  No block
60    ///   scalars (`|`, `>`) in v1.
61    ///
62    /// # Open decisions (resolved)
63    ///
64    /// - **Nested-map order.** `QuillValue` is backed by `serde_json::Value`
65    ///   whose object type (`serde_json::Map`) preserves insertion order when the
66    ///   `serde_json/preserve_order` feature is enabled (it is in this workspace).
67    ///   Insertion order is therefore preserved for nested maps at emit time.
68    ///
69    /// - **Empty containers.**
70    ///   - Empty object (`{}`) → the key is **omitted** from emit entirely.
71    ///   - Empty array (`[]`) → emitted as `key: []\n`.
72    ///
73    /// # What is lost
74    ///
75    /// - **YAML comments**: stripped during parsing; not stored in `Document`.
76    /// - **Custom tags** (`!fill`): the tag is dropped; the scalar value is
77    ///   preserved.  On re-emit the tag does not appear.
78    /// - **Original quoting style**: all strings are re-emitted double-quoted
79    ///   regardless of how they were written in the source.
80    pub fn to_markdown(&self) -> String {
81        let mut out = String::new();
82
83        // ── Main card (first fence + global body) ─────────────────────────────
84        emit_card_fence(&mut out, self.main());
85        out.push_str(self.main().body());
86
87        // ── Composable cards ──────────────────────────────────────────────────
88        // `emit_card` normalises the separator before each fence, so edited
89        // bodies (which may lack a trailing blank line) still round-trip.
90        for card in self.cards() {
91            ensure_f2_before_fence(&mut out);
92            emit_card_fence(&mut out, card);
93            if !card.body().is_empty() {
94                out.push_str(card.body());
95            }
96        }
97
98        out
99    }
100}
101
102// ── Card emission ─────────────────────────────────────────────────────────────
103
104/// Emit a card's metadata fence (between `---\n` markers), including the
105/// sentinel line and every frontmatter item.
106fn emit_card_fence(out: &mut String, card: &Card) {
107    out.push_str("---\n");
108
109    // Sentinel line.
110    match card.sentinel() {
111        Sentinel::Main(r) => {
112            out.push_str("QUILL: ");
113            out.push_str(&r.to_string());
114            out.push('\n');
115        }
116        Sentinel::Card(tag) => {
117            out.push_str("CARD: ");
118            out.push_str(tag);
119            out.push('\n');
120        }
121    }
122
123    // Frontmatter items in order.
124    let nested = card.frontmatter().nested_comments();
125    for item in card.frontmatter().items() {
126        match item {
127            FrontmatterItem::Field { key, value, fill } => {
128                let path = vec![CommentPathSegment::Key(key.clone())];
129                emit_field(out, key, value.as_json(), 0, *fill, &path, nested);
130            }
131            FrontmatterItem::Comment { text } => {
132                out.push_str("# ");
133                out.push_str(text);
134                out.push('\n');
135            }
136        }
137    }
138
139    out.push_str("---\n");
140}
141
142/// Ensures `out` ends with a `\n\n` suffix suitable for the F2 precondition
143/// of the next metadata fence.
144///
145/// Under the F2-separator-never-stored invariant, stored bodies may end with
146/// their content (no newline), a content line terminator (`\n`), or an
147/// author-intended blank line (`\n\n`, `\n\n\n`, …). In every case we append
148/// exactly one `\n` to produce the F2 blank line. If the body doesn't already
149/// end in `\n`, we also append a line terminator first so content lines are
150/// terminated in the emitted markdown.
151///
152/// Empty `out` satisfies F2 via the "line 1" clause (MARKDOWN.md §3 F2) and
153/// needs no separator.
154fn ensure_f2_before_fence(out: &mut String) {
155    if out.is_empty() {
156        return;
157    }
158    if !out.ends_with('\n') {
159        out.push('\n');
160    }
161    out.push('\n');
162}
163
164// ── YAML value emission ───────────────────────────────────────────────────────
165
166/// Emit comments captured at `path` whose `position` matches `position`,
167/// each as a `# text` line indented by `indent` spaces.
168fn emit_pending_comments(
169    out: &mut String,
170    path: &[CommentPathSegment],
171    position: usize,
172    indent: usize,
173    nested: &[NestedComment],
174) {
175    for c in nested {
176        if c.position == position && c.container_path.as_slice() == path {
177            push_indent(out, indent);
178            out.push_str("# ");
179            out.push_str(&c.text);
180            out.push('\n');
181        }
182    }
183}
184
185/// Emit a `key: <value>\n` pair at `indent` spaces.
186///
187/// `path` is the path to *this* field (parent path + this key). It's used as
188/// the *container* path when recursing into the value: nested comments
189/// captured at this path are interleaved between the value's children.
190///
191/// - Empty objects are **omitted** (caller skips them).
192/// - Empty arrays emit `key: []\n`.
193/// - All other values follow the block-style rules.
194/// - When `fill` is `true`, the emitted form is `key: !fill <value>` for
195///   scalars, `key: !fill\n  - …` for non-empty sequences,
196///   `key: !fill []` for empty sequences, and `key: !fill` for null.
197///   Mappings are rejected at parse and never reach this path.
198fn emit_field(
199    out: &mut String,
200    key: &str,
201    value: &JsonValue,
202    indent: usize,
203    fill: bool,
204    path: &[CommentPathSegment],
205    nested: &[NestedComment],
206) {
207    if fill {
208        push_indent(out, indent);
209        out.push_str(key);
210        match value {
211            JsonValue::Null => out.push_str(": !fill\n"),
212            JsonValue::Bool(_) | JsonValue::Number(_) | JsonValue::String(_) => {
213                out.push_str(": !fill ");
214                emit_scalar(out, value);
215                out.push('\n');
216            }
217            JsonValue::Array(items) if items.is_empty() => {
218                out.push_str(": !fill []\n");
219            }
220            JsonValue::Array(items) => {
221                out.push_str(": !fill\n");
222                emit_sequence_children(out, items, indent + 2, path, nested);
223            }
224            JsonValue::Object(_) => {
225                // Parser rejects !fill on mappings; recovery path only.
226                out.push_str(": ");
227                emit_scalar(out, value);
228                out.push('\n');
229            }
230        }
231        return;
232    }
233    match value {
234        JsonValue::Object(map) if map.is_empty() => {
235            // Empty object → omit the key entirely.
236            return;
237        }
238        JsonValue::Object(map) => {
239            push_indent(out, indent);
240            out.push_str(key);
241            out.push_str(":\n");
242            emit_mapping_children(out, map, indent + 2, path, nested);
243        }
244        JsonValue::Array(items) if items.is_empty() => {
245            push_indent(out, indent);
246            out.push_str(key);
247            out.push_str(": []\n");
248        }
249        JsonValue::Array(items) => {
250            push_indent(out, indent);
251            out.push_str(key);
252            out.push_str(":\n");
253            emit_sequence_children(out, items, indent + 2, path, nested);
254        }
255        _ => {
256            push_indent(out, indent);
257            out.push_str(key);
258            out.push_str(": ");
259            emit_scalar(out, value);
260            out.push('\n');
261        }
262    }
263}
264
265/// Emit the children of a mapping value with comment interleaving.
266///
267/// `child_indent` is the indent at which each child key sits; nested
268/// comments inside this mapping are emitted at the same indent. `path` is
269/// the path to the mapping container (its key in the parent).
270fn emit_mapping_children(
271    out: &mut String,
272    map: &serde_json::Map<String, JsonValue>,
273    child_indent: usize,
274    path: &[CommentPathSegment],
275    nested: &[NestedComment],
276) {
277    for (i, (k, v)) in map.iter().enumerate() {
278        emit_pending_comments(out, path, i, child_indent, nested);
279        let mut child_path = path.to_vec();
280        child_path.push(CommentPathSegment::Key(k.clone()));
281        emit_field(out, k, v, child_indent, false, &child_path, nested);
282    }
283    emit_pending_comments(out, path, map.len(), child_indent, nested);
284}
285
286/// Emit the children of a sequence value with comment interleaving.
287///
288/// `base_indent` is the indent at which each `- ` sits; nested comments
289/// inside this sequence are emitted at the same indent.
290fn emit_sequence_children(
291    out: &mut String,
292    items: &[JsonValue],
293    base_indent: usize,
294    path: &[CommentPathSegment],
295    nested: &[NestedComment],
296) {
297    for (i, item) in items.iter().enumerate() {
298        emit_pending_comments(out, path, i, base_indent, nested);
299        let mut child_path = path.to_vec();
300        child_path.push(CommentPathSegment::Index(i));
301        emit_sequence_item(out, item, base_indent, &child_path, nested);
302    }
303    emit_pending_comments(out, path, items.len(), base_indent, nested);
304}
305
306/// Emit a single `- <value>\n` sequence item at `base_indent` spaces.
307///
308/// `path` is the path to *this* item (parent path + item index).
309fn emit_sequence_item(
310    out: &mut String,
311    value: &JsonValue,
312    base_indent: usize,
313    path: &[CommentPathSegment],
314    nested: &[NestedComment],
315) {
316    match value {
317        JsonValue::Object(map) if map.is_empty() => {
318            // Empty nested object in a sequence: emit as `- {}`
319            push_indent(out, base_indent);
320            out.push_str("- {}\n");
321        }
322        JsonValue::Object(map) => {
323            // Block mapping inside a sequence.
324            // First key on same line as `- `, subsequent keys indented by 2.
325            // Comments inside this mapping use this item's path as the
326            // container. There is no slot to emit a "before-first-key"
327            // comment naturally, so we emit them as a leading line above
328            // the `- ` prefix at the same indent.
329            emit_pending_comments(out, path, 0, base_indent, nested);
330            let mut first = true;
331            for (i, (k, v)) in map.iter().enumerate() {
332                if !first {
333                    emit_pending_comments(out, path, i, base_indent + 2, nested);
334                }
335                let mut child_path = path.to_vec();
336                child_path.push(CommentPathSegment::Key(k.clone()));
337                if first {
338                    push_indent(out, base_indent);
339                    out.push_str("- ");
340                    emit_field_inline(out, k, v, base_indent + 2, &child_path, nested);
341                    first = false;
342                } else {
343                    emit_field(out, k, v, base_indent + 2, false, &child_path, nested);
344                }
345            }
346            emit_pending_comments(out, path, map.len(), base_indent + 2, nested);
347        }
348        JsonValue::Array(inner) if inner.is_empty() => {
349            push_indent(out, base_indent);
350            out.push_str("- []\n");
351        }
352        JsonValue::Array(inner) => {
353            // Nested sequence: emit `- ` for first item, then recurse.
354            push_indent(out, base_indent);
355            out.push_str("-\n");
356            emit_sequence_children(out, inner, base_indent + 2, path, nested);
357        }
358        _ => {
359            push_indent(out, base_indent);
360            out.push_str("- ");
361            emit_scalar(out, value);
362            out.push('\n');
363        }
364    }
365}
366
367/// Emit a `key: <value>\n` pair where the key is already on a `- ` line.
368/// The key/value go on the same line as the `- ` prefix (caller already wrote it).
369fn emit_field_inline(
370    out: &mut String,
371    key: &str,
372    value: &JsonValue,
373    child_indent: usize,
374    path: &[CommentPathSegment],
375    nested: &[NestedComment],
376) {
377    match value {
378        JsonValue::Object(map) if map.is_empty() => {
379            // key: {}
380            out.push_str(key);
381            out.push_str(": {}\n");
382        }
383        JsonValue::Object(map) => {
384            out.push_str(key);
385            out.push_str(":\n");
386            emit_mapping_children(out, map, child_indent, path, nested);
387        }
388        JsonValue::Array(items) if items.is_empty() => {
389            out.push_str(key);
390            out.push_str(": []\n");
391        }
392        JsonValue::Array(items) => {
393            out.push_str(key);
394            out.push_str(":\n");
395            emit_sequence_children(out, items, child_indent + 2, path, nested);
396        }
397        _ => {
398            out.push_str(key);
399            out.push_str(": ");
400            emit_scalar(out, value);
401            out.push('\n');
402        }
403    }
404}
405
406/// Emit a scalar value (no key, no newline) onto `out`.
407fn emit_scalar(out: &mut String, value: &JsonValue) {
408    match value {
409        JsonValue::Null => out.push_str("null"),
410        JsonValue::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
411        JsonValue::Number(n) => out.push_str(&n.to_string()),
412        JsonValue::String(s) => emit_double_quoted(out, s),
413        // Arrays/objects should not reach here via emit_field — handled above.
414        // As a fallback, emit JSON representation.
415        other => out.push_str(&other.to_string()),
416    }
417}
418
419/// Emit a string as a JSON-style double-quoted YAML scalar.
420///
421/// Escape rules (same as JSON string encoding):
422/// - `\` → `\\`
423/// - `"` → `\"`
424/// - `\n` → `\n`
425/// - `\r` → `\r`
426/// - `\t` → `\t`
427/// - Other control characters (U+0000–U+001F, U+007F–U+009F) → `\uXXXX`
428fn emit_double_quoted(out: &mut String, s: &str) {
429    out.push('"');
430    for ch in s.chars() {
431        match ch {
432            '\\' => out.push_str("\\\\"),
433            '"' => out.push_str("\\\""),
434            '\n' => out.push_str("\\n"),
435            '\r' => out.push_str("\\r"),
436            '\t' => out.push_str("\\t"),
437            c if (c as u32) < 0x20 || (0x7F..=0x9F).contains(&(c as u32)) => {
438                // Control characters: \u00XX
439                let n = c as u32;
440                if n <= 0xFF {
441                    out.push_str(&format!("\\u{:04X}", n));
442                } else {
443                    out.push_str(&format!("\\u{:04X}", n));
444                }
445            }
446            c => out.push(c),
447        }
448    }
449    out.push('"');
450}
451
452// ── Utilities ─────────────────────────────────────────────────────────────────
453
454fn push_indent(out: &mut String, spaces: usize) {
455    for _ in 0..spaces {
456        out.push(' ');
457    }
458}
459
460// ── Unit tests ────────────────────────────────────────────────────────────────
461
462#[cfg(test)]
463mod tests {
464    use super::*;
465    use crate::value::QuillValue;
466
467    #[test]
468    fn double_quoted_basic() {
469        let mut s = String::new();
470        emit_double_quoted(&mut s, "hello");
471        assert_eq!(s, r#""hello""#);
472    }
473
474    #[test]
475    fn double_quoted_ambiguous_strings() {
476        // These must remain strings on re-parse — the double-quoting is the guarantee.
477        for ambiguous in &[
478            "on", "off", "yes", "no", "true", "false", "null", "~", "01234", "1e10",
479        ] {
480            let mut s = String::new();
481            emit_double_quoted(&mut s, ambiguous);
482            assert!(
483                s.starts_with('"') && s.ends_with('"'),
484                "should be double-quoted: {}",
485                s
486            );
487            // Verify the content is correct (no extra escaping for these).
488            assert_eq!(&s[1..s.len() - 1], *ambiguous);
489        }
490    }
491
492    #[test]
493    fn double_quoted_escapes() {
494        let mut s = String::new();
495        emit_double_quoted(&mut s, "a\\b\"c\nd\te");
496        assert_eq!(s, r#""a\\b\"c\nd\te""#);
497    }
498
499    #[test]
500    fn double_quoted_control_chars() {
501        let mut s = String::new();
502        emit_double_quoted(&mut s, "\x01\x1F");
503        assert_eq!(s, "\"\\u0001\\u001F\"");
504    }
505
506    fn p(key: &str) -> Vec<CommentPathSegment> {
507        vec![CommentPathSegment::Key(key.to_string())]
508    }
509
510    #[test]
511    fn empty_object_omitted() {
512        let value = QuillValue::from_json(serde_json::json!({}));
513        let mut out = String::new();
514        emit_field(
515            &mut out,
516            "empty_map",
517            value.as_json(),
518            0,
519            false,
520            &p("empty_map"),
521            &[],
522        );
523        assert_eq!(out, ""); // omitted
524    }
525
526    #[test]
527    fn empty_array_emitted() {
528        let value = QuillValue::from_json(serde_json::json!([]));
529        let mut out = String::new();
530        emit_field(
531            &mut out,
532            "empty_seq",
533            value.as_json(),
534            0,
535            false,
536            &p("empty_seq"),
537            &[],
538        );
539        assert_eq!(out, "empty_seq: []\n");
540    }
541
542    #[test]
543    fn fill_null_emits_bare_tag() {
544        let value = QuillValue::from_json(serde_json::Value::Null);
545        let mut out = String::new();
546        emit_field(
547            &mut out,
548            "recipient",
549            value.as_json(),
550            0,
551            true,
552            &p("recipient"),
553            &[],
554        );
555        assert_eq!(out, "recipient: !fill\n");
556    }
557
558    #[test]
559    fn fill_string_emits_tag_with_value() {
560        let value = QuillValue::from_json(serde_json::json!("placeholder"));
561        let mut out = String::new();
562        emit_field(&mut out, "dept", value.as_json(), 0, true, &p("dept"), &[]);
563        assert_eq!(out, "dept: !fill \"placeholder\"\n");
564    }
565
566    #[test]
567    fn fill_integer_emits_tag_with_value() {
568        let value = QuillValue::from_json(serde_json::json!(42));
569        let mut out = String::new();
570        emit_field(
571            &mut out,
572            "count",
573            value.as_json(),
574            0,
575            true,
576            &p("count"),
577            &[],
578        );
579        assert_eq!(out, "count: !fill 42\n");
580    }
581}