quillmark_core/document/emit.rs
1//! Canonical Markdown emission for [`Document`].
2//!
3//! This module implements [`Document::to_markdown`], which converts a typed
4//! in-memory `Document` back into canonical Quillmark Markdown.
5//!
6//! ## YAML emission strategy
7//!
8//! `serde-saphyr::SerializerOptions::quote_all` was evaluated (spike, 2026-04-21)
9//! and found to emit single-quoted strings for ordinary scalars like `"on"` and
10//! `"01234"` — switching to double quotes only when the string contains a single
11//! quote, backslash, or control character. That behaviour is correct for
12//! round-trip type-fidelity (single-quoted YAML strings are re-parsed as strings),
13//! but the Quillmark spec (§5.2) requires **always double-quoted, JSON-style
14//! escaping**. Because `SerializerOptions` provides no "force double-quote" knob,
15//! the YAML value block is generated by a hand-written emitter in this module.
16//!
17//! The hand-written emitter is small (< 120 lines), covers exactly the
18//! `QuillValue` type variants, and gives complete control over quoting style and
19//! indentation without pulling in additional abstractions.
20
21use serde_json::Value as JsonValue;
22
23use super::frontmatter::FrontmatterItem;
24use super::prescan::{CommentPathSegment, NestedComment};
25use super::{Card, Document, Sentinel};
26
27// ── Public entry point ────────────────────────────────────────────────────────
28
29impl Document {
30 /// Emit canonical Quillmark Markdown from this document.
31 ///
32 /// # Contract
33 ///
34 /// 1. **Type-fidelity round-trip.** `Document::from_markdown(&doc.to_markdown())`
35 /// returns a `Document` equal to `doc` by value *and* by type variant.
36 /// `QuillValue::String("on")` round-trips as a string, never as a bool.
37 /// `QuillValue::String("01234")` round-trips as a string, never as an
38 /// integer. This guarantee is the whole point of owning emission.
39 ///
40 /// 2. **Emit-idempotent.** `to_markdown` is a pure function of `doc`; two
41 /// calls on the same `doc` return byte-equal strings.
42 ///
43 /// Byte-equality with the *original source* is **not** guaranteed.
44 ///
45 /// # Emission rules (§5.2)
46 ///
47 /// - Line endings: `\n` only. CRLF normalization happens on import.
48 /// - Frontmatter: `---\n`, `QUILL: <ref>` first, remaining fields in
49 /// `IndexMap` insertion order, `---\n`, blank line.
50 /// - Cards: one blank line before each, fence `---\nCARD: <tag>\n<fields>\n---\n<body>`.
51 /// - Body: emitted verbatim after frontmatter (and cards).
52 /// - Mappings and sequences: **block style** at every nesting level.
53 /// - Booleans: `true` / `false`.
54 /// - Null: `null`.
55 /// - Numbers: bare literals (integer or float as stored in `serde_json::Value`).
56 /// - **Strings: always double-quoted**, JSON-style escaping
57 /// (`\"`, `\\`, `\n`, `\t`, `\uXXXX` for control chars). This is the
58 /// load-bearing rule that guarantees type fidelity.
59 /// - Multi-line strings: double-quoted with `\n` escape sequences. No block
60 /// scalars (`|`, `>`) in v1.
61 ///
62 /// # Open decisions (resolved)
63 ///
64 /// - **Nested-map order.** `QuillValue` is backed by `serde_json::Value`
65 /// whose object type (`serde_json::Map`) preserves insertion order when the
66 /// `serde_json/preserve_order` feature is enabled (it is in this workspace).
67 /// Insertion order is therefore preserved for nested maps at emit time.
68 ///
69 /// - **Empty containers.**
70 /// - Empty object (`{}`) → the key is **omitted** from emit entirely.
71 /// - Empty array (`[]`) → emitted as `key: []\n`.
72 ///
73 /// # What is lost
74 ///
75 /// - **YAML comments**: stripped during parsing; not stored in `Document`.
76 /// - **Custom tags** (`!fill`): the tag is dropped; the scalar value is
77 /// preserved. On re-emit the tag does not appear.
78 /// - **Original quoting style**: all strings are re-emitted double-quoted
79 /// regardless of how they were written in the source.
80 pub fn to_markdown(&self) -> String {
81 let mut out = String::new();
82
83 // ── Main card (first fence + global body) ─────────────────────────────
84 emit_card_fence(&mut out, self.main());
85 out.push_str(self.main().body());
86
87 // ── Composable cards ──────────────────────────────────────────────────
88 // `emit_card` normalises the separator before each fence, so edited
89 // bodies (which may lack a trailing blank line) still round-trip.
90 for card in self.cards() {
91 ensure_f2_before_fence(&mut out);
92 emit_card_fence(&mut out, card);
93 if !card.body().is_empty() {
94 out.push_str(card.body());
95 }
96 }
97
98 out
99 }
100}
101
102// ── Card emission ─────────────────────────────────────────────────────────────
103
104/// Emit a card's metadata fence (between `---\n` markers), including the
105/// sentinel line and every frontmatter item.
106fn emit_card_fence(out: &mut String, card: &Card) {
107 out.push_str("---\n");
108
109 // Sentinel line.
110 match card.sentinel() {
111 Sentinel::Main(r) => {
112 out.push_str("QUILL: ");
113 out.push_str(&r.to_string());
114 out.push('\n');
115 }
116 Sentinel::Card(tag) => {
117 out.push_str("CARD: ");
118 out.push_str(tag);
119 out.push('\n');
120 }
121 }
122
123 // Frontmatter items in order.
124 let nested = card.frontmatter().nested_comments();
125 for item in card.frontmatter().items() {
126 match item {
127 FrontmatterItem::Field { key, value, fill } => {
128 let path = vec![CommentPathSegment::Key(key.clone())];
129 emit_field(out, key, value.as_json(), 0, *fill, &path, nested);
130 }
131 FrontmatterItem::Comment { text } => {
132 out.push_str("# ");
133 out.push_str(text);
134 out.push('\n');
135 }
136 }
137 }
138
139 out.push_str("---\n");
140}
141
142/// Ensures `out` ends with a `\n\n` suffix suitable for the F2 precondition
143/// of the next metadata fence.
144///
145/// Under the F2-separator-never-stored invariant, stored bodies may end with
146/// their content (no newline), a content line terminator (`\n`), or an
147/// author-intended blank line (`\n\n`, `\n\n\n`, …). In every case we append
148/// exactly one `\n` to produce the F2 blank line. If the body doesn't already
149/// end in `\n`, we also append a line terminator first so content lines are
150/// terminated in the emitted markdown.
151///
152/// Empty `out` satisfies F2 via the "line 1" clause (MARKDOWN.md §3 F2) and
153/// needs no separator.
154fn ensure_f2_before_fence(out: &mut String) {
155 if out.is_empty() {
156 return;
157 }
158 if !out.ends_with('\n') {
159 out.push('\n');
160 }
161 out.push('\n');
162}
163
164// ── YAML value emission ───────────────────────────────────────────────────────
165
166/// Emit comments captured at `path` whose `position` matches `position`,
167/// each as a `# text` line indented by `indent` spaces.
168fn emit_pending_comments(
169 out: &mut String,
170 path: &[CommentPathSegment],
171 position: usize,
172 indent: usize,
173 nested: &[NestedComment],
174) {
175 for c in nested {
176 if c.position == position && c.container_path.as_slice() == path {
177 push_indent(out, indent);
178 out.push_str("# ");
179 out.push_str(&c.text);
180 out.push('\n');
181 }
182 }
183}
184
185/// Emit a `key: <value>\n` pair at `indent` spaces.
186///
187/// `path` is the path to *this* field (parent path + this key). It's used as
188/// the *container* path when recursing into the value: nested comments
189/// captured at this path are interleaved between the value's children.
190///
191/// - Empty objects are **omitted** (caller skips them).
192/// - Empty arrays emit `key: []\n`.
193/// - All other values follow the block-style rules.
194/// - When `fill` is `true`, the emitted form is `key: !fill <value>` for
195/// scalars, `key: !fill\n - …` for non-empty sequences,
196/// `key: !fill []` for empty sequences, and `key: !fill` for null.
197/// Mappings are rejected at parse and never reach this path.
198fn emit_field(
199 out: &mut String,
200 key: &str,
201 value: &JsonValue,
202 indent: usize,
203 fill: bool,
204 path: &[CommentPathSegment],
205 nested: &[NestedComment],
206) {
207 if fill {
208 push_indent(out, indent);
209 out.push_str(key);
210 match value {
211 JsonValue::Null => out.push_str(": !fill\n"),
212 JsonValue::Bool(_) | JsonValue::Number(_) | JsonValue::String(_) => {
213 out.push_str(": !fill ");
214 emit_scalar(out, value);
215 out.push('\n');
216 }
217 JsonValue::Array(items) if items.is_empty() => {
218 out.push_str(": !fill []\n");
219 }
220 JsonValue::Array(items) => {
221 out.push_str(": !fill\n");
222 emit_sequence_children(out, items, indent + 2, path, nested);
223 }
224 JsonValue::Object(_) => {
225 // Parser rejects !fill on mappings; recovery path only.
226 out.push_str(": ");
227 emit_scalar(out, value);
228 out.push('\n');
229 }
230 }
231 return;
232 }
233 match value {
234 JsonValue::Object(map) if map.is_empty() => {
235 // Empty object → omit the key entirely.
236 return;
237 }
238 JsonValue::Object(map) => {
239 push_indent(out, indent);
240 out.push_str(key);
241 out.push_str(":\n");
242 emit_mapping_children(out, map, indent + 2, path, nested);
243 }
244 JsonValue::Array(items) if items.is_empty() => {
245 push_indent(out, indent);
246 out.push_str(key);
247 out.push_str(": []\n");
248 }
249 JsonValue::Array(items) => {
250 push_indent(out, indent);
251 out.push_str(key);
252 out.push_str(":\n");
253 emit_sequence_children(out, items, indent + 2, path, nested);
254 }
255 _ => {
256 push_indent(out, indent);
257 out.push_str(key);
258 out.push_str(": ");
259 emit_scalar(out, value);
260 out.push('\n');
261 }
262 }
263}
264
265/// Emit the children of a mapping value with comment interleaving.
266///
267/// `child_indent` is the indent at which each child key sits; nested
268/// comments inside this mapping are emitted at the same indent. `path` is
269/// the path to the mapping container (its key in the parent).
270fn emit_mapping_children(
271 out: &mut String,
272 map: &serde_json::Map<String, JsonValue>,
273 child_indent: usize,
274 path: &[CommentPathSegment],
275 nested: &[NestedComment],
276) {
277 for (i, (k, v)) in map.iter().enumerate() {
278 emit_pending_comments(out, path, i, child_indent, nested);
279 let mut child_path = path.to_vec();
280 child_path.push(CommentPathSegment::Key(k.clone()));
281 emit_field(out, k, v, child_indent, false, &child_path, nested);
282 }
283 emit_pending_comments(out, path, map.len(), child_indent, nested);
284}
285
286/// Emit the children of a sequence value with comment interleaving.
287///
288/// `base_indent` is the indent at which each `- ` sits; nested comments
289/// inside this sequence are emitted at the same indent.
290fn emit_sequence_children(
291 out: &mut String,
292 items: &[JsonValue],
293 base_indent: usize,
294 path: &[CommentPathSegment],
295 nested: &[NestedComment],
296) {
297 for (i, item) in items.iter().enumerate() {
298 emit_pending_comments(out, path, i, base_indent, nested);
299 let mut child_path = path.to_vec();
300 child_path.push(CommentPathSegment::Index(i));
301 emit_sequence_item(out, item, base_indent, &child_path, nested);
302 }
303 emit_pending_comments(out, path, items.len(), base_indent, nested);
304}
305
306/// Emit a single `- <value>\n` sequence item at `base_indent` spaces.
307///
308/// `path` is the path to *this* item (parent path + item index).
309fn emit_sequence_item(
310 out: &mut String,
311 value: &JsonValue,
312 base_indent: usize,
313 path: &[CommentPathSegment],
314 nested: &[NestedComment],
315) {
316 match value {
317 JsonValue::Object(map) if map.is_empty() => {
318 // Empty nested object in a sequence: emit as `- {}`
319 push_indent(out, base_indent);
320 out.push_str("- {}\n");
321 }
322 JsonValue::Object(map) => {
323 // Block mapping inside a sequence.
324 // First key on same line as `- `, subsequent keys indented by 2.
325 // Comments inside this mapping use this item's path as the
326 // container. There is no slot to emit a "before-first-key"
327 // comment naturally, so we emit them as a leading line above
328 // the `- ` prefix at the same indent.
329 emit_pending_comments(out, path, 0, base_indent, nested);
330 let mut first = true;
331 for (i, (k, v)) in map.iter().enumerate() {
332 if !first {
333 emit_pending_comments(out, path, i, base_indent + 2, nested);
334 }
335 let mut child_path = path.to_vec();
336 child_path.push(CommentPathSegment::Key(k.clone()));
337 if first {
338 push_indent(out, base_indent);
339 out.push_str("- ");
340 emit_field_inline(out, k, v, base_indent + 2, &child_path, nested);
341 first = false;
342 } else {
343 emit_field(out, k, v, base_indent + 2, false, &child_path, nested);
344 }
345 }
346 emit_pending_comments(out, path, map.len(), base_indent + 2, nested);
347 }
348 JsonValue::Array(inner) if inner.is_empty() => {
349 push_indent(out, base_indent);
350 out.push_str("- []\n");
351 }
352 JsonValue::Array(inner) => {
353 // Nested sequence: emit `- ` for first item, then recurse.
354 push_indent(out, base_indent);
355 out.push_str("-\n");
356 emit_sequence_children(out, inner, base_indent + 2, path, nested);
357 }
358 _ => {
359 push_indent(out, base_indent);
360 out.push_str("- ");
361 emit_scalar(out, value);
362 out.push('\n');
363 }
364 }
365}
366
367/// Emit a `key: <value>\n` pair where the key is already on a `- ` line.
368/// The key/value go on the same line as the `- ` prefix (caller already wrote it).
369fn emit_field_inline(
370 out: &mut String,
371 key: &str,
372 value: &JsonValue,
373 child_indent: usize,
374 path: &[CommentPathSegment],
375 nested: &[NestedComment],
376) {
377 match value {
378 JsonValue::Object(map) if map.is_empty() => {
379 // key: {}
380 out.push_str(key);
381 out.push_str(": {}\n");
382 }
383 JsonValue::Object(map) => {
384 out.push_str(key);
385 out.push_str(":\n");
386 emit_mapping_children(out, map, child_indent, path, nested);
387 }
388 JsonValue::Array(items) if items.is_empty() => {
389 out.push_str(key);
390 out.push_str(": []\n");
391 }
392 JsonValue::Array(items) => {
393 out.push_str(key);
394 out.push_str(":\n");
395 emit_sequence_children(out, items, child_indent + 2, path, nested);
396 }
397 _ => {
398 out.push_str(key);
399 out.push_str(": ");
400 emit_scalar(out, value);
401 out.push('\n');
402 }
403 }
404}
405
406/// Emit a scalar value (no key, no newline) onto `out`.
407fn emit_scalar(out: &mut String, value: &JsonValue) {
408 match value {
409 JsonValue::Null => out.push_str("null"),
410 JsonValue::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
411 JsonValue::Number(n) => out.push_str(&n.to_string()),
412 JsonValue::String(s) => emit_double_quoted(out, s),
413 // Arrays/objects should not reach here via emit_field — handled above.
414 // As a fallback, emit JSON representation.
415 other => out.push_str(&other.to_string()),
416 }
417}
418
419/// Emit a string as a JSON-style double-quoted YAML scalar.
420///
421/// Escape rules (same as JSON string encoding):
422/// - `\` → `\\`
423/// - `"` → `\"`
424/// - `\n` → `\n`
425/// - `\r` → `\r`
426/// - `\t` → `\t`
427/// - Other control characters (U+0000–U+001F, U+007F–U+009F) → `\uXXXX`
428fn emit_double_quoted(out: &mut String, s: &str) {
429 out.push('"');
430 for ch in s.chars() {
431 match ch {
432 '\\' => out.push_str("\\\\"),
433 '"' => out.push_str("\\\""),
434 '\n' => out.push_str("\\n"),
435 '\r' => out.push_str("\\r"),
436 '\t' => out.push_str("\\t"),
437 c if (c as u32) < 0x20 || (0x7F..=0x9F).contains(&(c as u32)) => {
438 // Control characters: \u00XX
439 let n = c as u32;
440 if n <= 0xFF {
441 out.push_str(&format!("\\u{:04X}", n));
442 } else {
443 out.push_str(&format!("\\u{:04X}", n));
444 }
445 }
446 c => out.push(c),
447 }
448 }
449 out.push('"');
450}
451
452// ── Utilities ─────────────────────────────────────────────────────────────────
453
454fn push_indent(out: &mut String, spaces: usize) {
455 for _ in 0..spaces {
456 out.push(' ');
457 }
458}
459
460// ── Unit tests ────────────────────────────────────────────────────────────────
461
462#[cfg(test)]
463mod tests {
464 use super::*;
465 use crate::value::QuillValue;
466
467 #[test]
468 fn double_quoted_basic() {
469 let mut s = String::new();
470 emit_double_quoted(&mut s, "hello");
471 assert_eq!(s, r#""hello""#);
472 }
473
474 #[test]
475 fn double_quoted_ambiguous_strings() {
476 // These must remain strings on re-parse — the double-quoting is the guarantee.
477 for ambiguous in &[
478 "on", "off", "yes", "no", "true", "false", "null", "~", "01234", "1e10",
479 ] {
480 let mut s = String::new();
481 emit_double_quoted(&mut s, ambiguous);
482 assert!(
483 s.starts_with('"') && s.ends_with('"'),
484 "should be double-quoted: {}",
485 s
486 );
487 // Verify the content is correct (no extra escaping for these).
488 assert_eq!(&s[1..s.len() - 1], *ambiguous);
489 }
490 }
491
492 #[test]
493 fn double_quoted_escapes() {
494 let mut s = String::new();
495 emit_double_quoted(&mut s, "a\\b\"c\nd\te");
496 assert_eq!(s, r#""a\\b\"c\nd\te""#);
497 }
498
499 #[test]
500 fn double_quoted_control_chars() {
501 let mut s = String::new();
502 emit_double_quoted(&mut s, "\x01\x1F");
503 assert_eq!(s, "\"\\u0001\\u001F\"");
504 }
505
506 fn p(key: &str) -> Vec<CommentPathSegment> {
507 vec![CommentPathSegment::Key(key.to_string())]
508 }
509
510 #[test]
511 fn empty_object_omitted() {
512 let value = QuillValue::from_json(serde_json::json!({}));
513 let mut out = String::new();
514 emit_field(
515 &mut out,
516 "empty_map",
517 value.as_json(),
518 0,
519 false,
520 &p("empty_map"),
521 &[],
522 );
523 assert_eq!(out, ""); // omitted
524 }
525
526 #[test]
527 fn empty_array_emitted() {
528 let value = QuillValue::from_json(serde_json::json!([]));
529 let mut out = String::new();
530 emit_field(
531 &mut out,
532 "empty_seq",
533 value.as_json(),
534 0,
535 false,
536 &p("empty_seq"),
537 &[],
538 );
539 assert_eq!(out, "empty_seq: []\n");
540 }
541
542 #[test]
543 fn fill_null_emits_bare_tag() {
544 let value = QuillValue::from_json(serde_json::Value::Null);
545 let mut out = String::new();
546 emit_field(
547 &mut out,
548 "recipient",
549 value.as_json(),
550 0,
551 true,
552 &p("recipient"),
553 &[],
554 );
555 assert_eq!(out, "recipient: !fill\n");
556 }
557
558 #[test]
559 fn fill_string_emits_tag_with_value() {
560 let value = QuillValue::from_json(serde_json::json!("placeholder"));
561 let mut out = String::new();
562 emit_field(&mut out, "dept", value.as_json(), 0, true, &p("dept"), &[]);
563 assert_eq!(out, "dept: !fill \"placeholder\"\n");
564 }
565
566 #[test]
567 fn fill_integer_emits_tag_with_value() {
568 let value = QuillValue::from_json(serde_json::json!(42));
569 let mut out = String::new();
570 emit_field(
571 &mut out,
572 "count",
573 value.as_json(),
574 0,
575 true,
576 &p("count"),
577 &[],
578 );
579 assert_eq!(out, "count: !fill 42\n");
580 }
581}