okf 0.1.0-alpha.1

A pure-Rust, zero-dependency implementation of the Open Knowledge Format (OKF) v0.1: parser, model, validator, link graph, and index/log tooling.
Documentation
//! Block-style YAML emitter for the OKF subset.
//!
//! The emitter targets one property: re-parsing its output reproduces the input
//! value (`parse(emit(v)) == v`), with mapping key order preserved. It is not
//! intended to be byte-identical to any other YAML writer.

use super::{Mapping, Value};

const INDENT_STEP: usize = 2;

/// Emits a value as YAML text (always ends with a newline, like PyYAML's
/// `safe_dump`).
pub fn emit(value: &Value) -> String {
    let mut out = String::new();
    match value {
        Value::Mapping(m) if !m.is_empty() => emit_mapping(m, 0, &mut out),
        Value::Sequence(s) if !s.is_empty() => emit_sequence(s, 0, &mut out),
        scalar => {
            out.push_str(&emit_scalar(scalar));
            out.push('\n');
        }
    }
    out
}

fn emit_mapping(map: &Mapping, indent: usize, out: &mut String) {
    let pad = " ".repeat(indent);
    for (k, v) in map.iter() {
        let key = emit_scalar(k);
        match v {
            Value::Mapping(m) if !m.is_empty() => {
                out.push_str(&format!("{pad}{key}:\n"));
                emit_mapping(m, indent + INDENT_STEP, out);
            }
            Value::Sequence(s) if !s.is_empty() => {
                out.push_str(&format!("{pad}{key}:\n"));
                emit_sequence(s, indent + INDENT_STEP, out);
            }
            _ => out.push_str(&format!("{pad}{key}: {}\n", emit_scalar(v))),
        }
    }
}

fn emit_sequence(seq: &[Value], indent: usize, out: &mut String) {
    let pad = " ".repeat(indent);
    for item in seq {
        match item {
            Value::Mapping(m) if !m.is_empty() => {
                out.push_str(&format!("{pad}-\n"));
                emit_mapping(m, indent + INDENT_STEP, out);
            }
            Value::Sequence(s) if !s.is_empty() => {
                out.push_str(&format!("{pad}-\n"));
                emit_sequence(s, indent + INDENT_STEP, out);
            }
            _ => out.push_str(&format!("{pad}- {}\n", emit_scalar(item))),
        }
    }
}

/// Emits a scalar (or an empty collection) inline.
fn emit_scalar(value: &Value) -> String {
    match value {
        Value::Null => "null".to_string(),
        Value::Bool(true) => "true".to_string(),
        Value::Bool(false) => "false".to_string(),
        Value::Int(i) => i.to_string(),
        Value::Float(f) => format_float(*f),
        Value::String(s) => emit_string(s),
        Value::Sequence(s) if s.is_empty() => "[]".to_string(),
        Value::Mapping(m) if m.is_empty() => "{}".to_string(),
        // Non-empty collections never reach here in block context.
        Value::Sequence(_) | Value::Mapping(_) => "[]".to_string(),
    }
}

fn format_float(f: f64) -> String {
    if f.is_nan() {
        return ".nan".to_string();
    }
    if f.is_infinite() {
        return if f > 0.0 { ".inf".to_string() } else { "-.inf".to_string() };
    }
    // `{:?}` is the shortest round-tripping representation, but it can omit the
    // decimal point for exponential forms (`1e30`). Ensure a `.` is present so
    // the value re-parses as a float rather than a string.
    let s = format!("{f:?}");
    if s.contains('.') {
        s
    } else if let Some(e) = s.find(['e', 'E']) {
        format!("{}.0{}", &s[..e], &s[e..])
    } else {
        format!("{s}.0")
    }
}

fn emit_string(s: &str) -> String {
    if is_safe_plain(s) {
        s.to_string()
    } else {
        double_quote(s)
    }
}

/// Whether a string can be emitted as a plain (unquoted) scalar without being
/// misread on re-parse.
fn is_safe_plain(s: &str) -> bool {
    if s.is_empty() {
        return false;
    }
    // Must not be reinterpreted as null/bool/number.
    if super::Value::parse(s).map(|v| v != Value::String(s.to_string())).unwrap_or(true) {
        // parse() of a multiline/odd string may error; fall through to quoting.
        return false;
    }
    if s.starts_with(' ') || s.ends_with(' ') {
        return false;
    }
    let first = s.chars().next().unwrap();
    const INDICATORS: &[char] = &[
        '-', '?', ':', ',', '[', ']', '{', '}', '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@',
        '`', ' ',
    ];
    if INDICATORS.contains(&first) {
        return false;
    }
    let bytes: Vec<char> = s.chars().collect();
    for (i, &c) in bytes.iter().enumerate() {
        match c {
            '\n' | '\t' | '\r' => return false,
            ':' if bytes.get(i + 1).map(|n| *n == ' ').unwrap_or(true) => return false,
            '#' if i > 0 && bytes[i - 1] == ' ' => return false,
            _ => {}
        }
    }
    true
}

fn double_quote(s: &str) -> String {
    let mut out = String::with_capacity(s.len() + 2);
    out.push('"');
    for c in s.chars() {
        match c {
            '\\' => out.push_str("\\\\"),
            '"' => out.push_str("\\\""),
            '\n' => out.push_str("\\n"),
            '\t' => out.push_str("\\t"),
            '\r' => out.push_str("\\r"),
            '\u{0008}' => out.push_str("\\b"),
            '\u{000C}' => out.push_str("\\f"),
            '\0' => out.push_str("\\0"),
            c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
            c => out.push(c),
        }
    }
    out.push('"');
    out
}