quillmark-core 0.58.2-rc.2

Core types and functionality for Quillmark
Documentation
//! Canonical Markdown emission for [`Document`].
//!
//! This module implements [`Document::to_markdown`], which converts a typed
//! in-memory `Document` back into canonical Quillmark Markdown.
//!
//! ## YAML emission strategy
//!
//! `serde-saphyr::SerializerOptions::quote_all` was evaluated (spike, 2026-04-21)
//! and found to emit single-quoted strings for ordinary scalars like `"on"` and
//! `"01234"` — switching to double quotes only when the string contains a single
//! quote, backslash, or control character.  That behaviour is correct for
//! round-trip type-fidelity (single-quoted YAML strings are re-parsed as strings),
//! but the Quillmark spec (§5.2) requires **always double-quoted, JSON-style
//! escaping**.  Because `SerializerOptions` provides no "force double-quote" knob,
//! the YAML value block is generated by a hand-written emitter in this module.
//!
//! The hand-written emitter is small (< 120 lines), covers exactly the
//! `QuillValue` type variants, and gives complete control over quoting style and
//! indentation without pulling in additional abstractions.

use serde_json::Value as JsonValue;

use super::{Card, Document};

// ── Public entry point ────────────────────────────────────────────────────────

impl Document {
    /// Emit canonical Quillmark Markdown from this document.
    ///
    /// # Contract
    ///
    /// 1. **Type-fidelity round-trip.** `Document::from_markdown(&doc.to_markdown())`
    ///    returns a `Document` equal to `doc` by value *and* by type variant.
    ///    `QuillValue::String("on")` round-trips as a string, never as a bool.
    ///    `QuillValue::String("01234")` round-trips as a string, never as an
    ///    integer.  This guarantee is the whole point of owning emission.
    ///
    /// 2. **Emit-idempotent.** `to_markdown` is a pure function of `doc`; two
    ///    calls on the same `doc` return byte-equal strings.
    ///
    /// Byte-equality with the *original source* is **not** guaranteed.
    ///
    /// # Emission rules (§5.2)
    ///
    /// - Line endings: `\n` only.  CRLF normalization happens on import.
    /// - Frontmatter: `---\n`, `QUILL: <ref>` first, remaining fields in
    ///   `IndexMap` insertion order, `---\n`, blank line.
    /// - Cards: one blank line before each, fence `---\nCARD: <tag>\n<fields>\n---\n<body>`.
    /// - Body: emitted verbatim after frontmatter (and cards).
    /// - Mappings and sequences: **block style** at every nesting level.
    /// - Booleans: `true` / `false`.
    /// - Null: `null`.
    /// - Numbers: bare literals (integer or float as stored in `serde_json::Value`).
    /// - **Strings: always double-quoted**, JSON-style escaping
    ///   (`\"`, `\\`, `\n`, `\t`, `\uXXXX` for control chars).  This is the
    ///   load-bearing rule that guarantees type fidelity.
    /// - Multi-line strings: double-quoted with `\n` escape sequences.  No block
    ///   scalars (`|`, `>`) in v1.
    ///
    /// # Open decisions (resolved)
    ///
    /// - **Nested-map order.** `QuillValue` is backed by `serde_json::Value`
    ///   whose object type (`serde_json::Map`) preserves insertion order when the
    ///   `serde_json/preserve_order` feature is enabled (it is in this workspace).
    ///   Insertion order is therefore preserved for nested maps at emit time.
    ///
    /// - **Empty containers.**
    ///   - Empty object (`{}`) → the key is **omitted** from emit entirely.
    ///   - Empty array (`[]`) → emitted as `key: []\n`.
    ///
    /// # What is lost
    ///
    /// - **YAML comments**: stripped during parsing; not stored in `Document`.
    /// - **Custom tags** (`!fill`): the tag is dropped; the scalar value is
    ///   preserved.  On re-emit the tag does not appear.
    /// - **Original quoting style**: all strings are re-emitted double-quoted
    ///   regardless of how they were written in the source.
    pub fn to_markdown(&self) -> String {
        let mut out = String::new();

        // ── Frontmatter fence ─────────────────────────────────────────────────
        out.push_str("---\n");

        // QUILL first
        out.push_str("QUILL: ");
        out.push_str(&self.quill_ref.to_string());
        out.push('\n');

        // Remaining frontmatter in IndexMap insertion order.
        for (key, value) in &self.frontmatter {
            emit_field(&mut out, key, value.as_json(), 0);
        }

        out.push_str("---\n");

        // ── Global body ───────────────────────────────────────────────────────
        // The body contains the blank line(s) between the frontmatter fence and
        // the first card (or EOF).  Emit it verbatim before any cards so the
        // parser sees the correct document structure on re-parse.
        out.push_str(&self.body);

        // ── Cards ─────────────────────────────────────────────────────────────
        // `emit_card` normalises the separator before each fence, so edited
        // bodies (which may lack a trailing blank line) still round-trip.
        for card in &self.cards {
            emit_card(&mut out, card);
        }

        out
    }
}

// ── Card emission ─────────────────────────────────────────────────────────────

fn emit_card(out: &mut String, card: &Card) {
    // MARKDOWN.md §3 F2 requires a blank line before each metadata fence.
    // Parsed bodies typically already end with `\n\n`, but edited bodies
    // (e.g. `replace_body("x")` with no trailing newline) do not — normalise
    // here so the emitted markdown round-trips through the parser.
    ensure_blank_line_before_fence(out);
    out.push_str("---\n");
    out.push_str("CARD: ");
    out.push_str(card.tag());
    out.push('\n');

    for (key, value) in card.fields() {
        emit_field(out, key, value.as_json(), 0);
    }

    out.push_str("---\n");

    // Card body: emitted verbatim.  Empty body → nothing after the fence.
    if !card.body().is_empty() {
        out.push_str(card.body());
    }
}

/// Ensures `out` ends with a blank line (`"\n\n"`) or is empty — the F2
/// precondition for the next metadata fence marker.
fn ensure_blank_line_before_fence(out: &mut String) {
    if out.is_empty() || out.ends_with("\n\n") {
        return;
    }
    if out.ends_with('\n') {
        out.push('\n');
    } else {
        out.push_str("\n\n");
    }
}

// ── YAML value emission ───────────────────────────────────────────────────────

/// Emit a `key: <value>\n` pair at `indent` spaces.
///
/// - Empty objects are **omitted** (caller skips them).
/// - Empty arrays emit `key: []\n`.
/// - All other values follow the block-style rules.
fn emit_field(out: &mut String, key: &str, value: &JsonValue, indent: usize) {
    match value {
        JsonValue::Object(map) if map.is_empty() => {
            // Empty object → omit the key entirely.
            return;
        }
        JsonValue::Object(map) => {
            push_indent(out, indent);
            out.push_str(key);
            out.push_str(":\n");
            for (k, v) in map {
                emit_field(out, k, v, indent + 2);
            }
        }
        JsonValue::Array(items) if items.is_empty() => {
            push_indent(out, indent);
            out.push_str(key);
            out.push_str(": []\n");
        }
        JsonValue::Array(items) => {
            push_indent(out, indent);
            out.push_str(key);
            out.push_str(":\n");
            for item in items {
                emit_sequence_item(out, item, indent);
            }
        }
        _ => {
            push_indent(out, indent);
            out.push_str(key);
            out.push_str(": ");
            emit_scalar(out, value);
            out.push('\n');
        }
    }
}

/// Emit a single `- <value>\n` sequence item at `base_indent` spaces.
fn emit_sequence_item(out: &mut String, value: &JsonValue, base_indent: usize) {
    match value {
        JsonValue::Object(map) if map.is_empty() => {
            // Empty nested object in a sequence: emit as `- {}`
            push_indent(out, base_indent);
            out.push_str("- {}\n");
        }
        JsonValue::Object(map) => {
            // Block mapping inside a sequence.
            // First key on same line as `- `, subsequent keys indented by 2.
            let mut first = true;
            for (k, v) in map {
                if first {
                    push_indent(out, base_indent);
                    out.push_str("- ");
                    emit_field_inline(out, k, v, base_indent + 2);
                    first = false;
                } else {
                    emit_field(out, k, v, base_indent + 2);
                }
            }
        }
        JsonValue::Array(inner) if inner.is_empty() => {
            push_indent(out, base_indent);
            out.push_str("- []\n");
        }
        JsonValue::Array(inner) => {
            // Nested sequence: emit `- ` for first item, then recurse.
            push_indent(out, base_indent);
            out.push_str("-\n");
            for item in inner {
                emit_sequence_item(out, item, base_indent + 2);
            }
        }
        _ => {
            push_indent(out, base_indent);
            out.push_str("- ");
            emit_scalar(out, value);
            out.push('\n');
        }
    }
}

/// Emit a `key: <value>\n` pair where the key is already on a `- ` line.
/// The key/value go on the same line as the `- ` prefix (caller already wrote it).
fn emit_field_inline(out: &mut String, key: &str, value: &JsonValue, child_indent: usize) {
    match value {
        JsonValue::Object(map) if map.is_empty() => {
            // key: {}
            out.push_str(key);
            out.push_str(": {}\n");
        }
        JsonValue::Object(map) => {
            out.push_str(key);
            out.push_str(":\n");
            for (k, v) in map {
                emit_field(out, k, v, child_indent);
            }
        }
        JsonValue::Array(items) if items.is_empty() => {
            out.push_str(key);
            out.push_str(": []\n");
        }
        JsonValue::Array(items) => {
            out.push_str(key);
            out.push_str(":\n");
            for item in items {
                emit_sequence_item(out, item, child_indent);
            }
        }
        _ => {
            out.push_str(key);
            out.push_str(": ");
            emit_scalar(out, value);
            out.push('\n');
        }
    }
}

/// Emit a scalar value (no key, no newline) onto `out`.
fn emit_scalar(out: &mut String, value: &JsonValue) {
    match value {
        JsonValue::Null => out.push_str("null"),
        JsonValue::Bool(b) => out.push_str(if *b { "true" } else { "false" }),
        JsonValue::Number(n) => out.push_str(&n.to_string()),
        JsonValue::String(s) => emit_double_quoted(out, s),
        // Arrays/objects should not reach here via emit_field — handled above.
        // As a fallback, emit JSON representation.
        other => out.push_str(&other.to_string()),
    }
}

/// Emit a string as a JSON-style double-quoted YAML scalar.
///
/// Escape rules (same as JSON string encoding):
/// - `\` → `\\`
/// - `"` → `\"`
/// - `\n` → `\n`
/// - `\r` → `\r`
/// - `\t` → `\t`
/// - Other control characters (U+0000–U+001F, U+007F–U+009F) → `\uXXXX`
fn emit_double_quoted(out: &mut String, s: &str) {
    out.push('"');
    for ch in s.chars() {
        match ch {
            '\\' => out.push_str("\\\\"),
            '"' => out.push_str("\\\""),
            '\n' => out.push_str("\\n"),
            '\r' => out.push_str("\\r"),
            '\t' => out.push_str("\\t"),
            c if (c as u32) < 0x20 || (0x7F..=0x9F).contains(&(c as u32)) => {
                // Control characters: \u00XX
                let n = c as u32;
                if n <= 0xFF {
                    out.push_str(&format!("\\u{:04X}", n));
                } else {
                    out.push_str(&format!("\\u{:04X}", n));
                }
            }
            c => out.push(c),
        }
    }
    out.push('"');
}

// ── Utilities ─────────────────────────────────────────────────────────────────

fn push_indent(out: &mut String, spaces: usize) {
    for _ in 0..spaces {
        out.push(' ');
    }
}

// ── Unit tests ────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use crate::value::QuillValue;

    #[test]
    fn double_quoted_basic() {
        let mut s = String::new();
        emit_double_quoted(&mut s, "hello");
        assert_eq!(s, r#""hello""#);
    }

    #[test]
    fn double_quoted_ambiguous_strings() {
        // These must remain strings on re-parse — the double-quoting is the guarantee.
        for ambiguous in &[
            "on", "off", "yes", "no", "true", "false", "null", "~", "01234", "1e10",
        ] {
            let mut s = String::new();
            emit_double_quoted(&mut s, ambiguous);
            assert!(
                s.starts_with('"') && s.ends_with('"'),
                "should be double-quoted: {}",
                s
            );
            // Verify the content is correct (no extra escaping for these).
            assert_eq!(&s[1..s.len() - 1], *ambiguous);
        }
    }

    #[test]
    fn double_quoted_escapes() {
        let mut s = String::new();
        emit_double_quoted(&mut s, "a\\b\"c\nd\te");
        assert_eq!(s, r#""a\\b\"c\nd\te""#);
    }

    #[test]
    fn double_quoted_control_chars() {
        let mut s = String::new();
        emit_double_quoted(&mut s, "\x01\x1F");
        assert_eq!(s, "\"\\u0001\\u001F\"");
    }

    #[test]
    fn empty_object_omitted() {
        let value = QuillValue::from_json(serde_json::json!({}));
        let mut out = String::new();
        emit_field(&mut out, "empty_map", value.as_json(), 0);
        assert_eq!(out, ""); // omitted
    }

    #[test]
    fn empty_array_emitted() {
        let value = QuillValue::from_json(serde_json::json!([]));
        let mut out = String::new();
        emit_field(&mut out, "empty_seq", value.as_json(), 0);
        assert_eq!(out, "empty_seq: []\n");
    }
}