use crate::error::{Error, Result};
use crate::value::{ObjectMap, Value};
pub fn emit_canonical(value: &Value) -> Result<String> {
let mut out = String::with_capacity(estimate_size(value));
match value {
Value::Object(o) if o.is_empty() => { }
Value::Object(o) => emit_object_pairs(o, 0, &mut out)?,
Value::Array(items) if items.is_empty() => {
out.push_str("[]\n");
}
Value::Array(items) => emit_array_root(items, &mut out)?,
_ => {
return Err(Error::Message(
"top-level value must be an Object or an Array".into(),
))
}
}
Ok(out)
}
fn emit_object_pairs(obj: &ObjectMap, indent: usize, out: &mut String) -> Result<()> {
for (k, v) in obj {
emit_pair(k, v, indent, out)?;
}
Ok(())
}
fn emit_array_root(items: &[Value], out: &mut String) -> Result<()> {
let needs_wrap = !items.is_empty() && first_item_needs_wrap(&items[0]);
if needs_wrap {
out.push_str("[\n");
for item in items {
emit_array_item(item, 1, out)?;
}
out.push_str("]\n");
} else {
for item in items {
emit_array_item(item, 0, out)?;
}
}
Ok(())
}
fn first_item_needs_wrap(item: &Value) -> bool {
matches!(item, Value::Object(o) if !o.is_empty())
|| matches!(item, Value::Array(a) if !a.is_empty())
}
fn emit_pair(key: &str, value: &Value, indent: usize, out: &mut String) -> Result<()> {
push_indent(out, indent);
crate::render::helpers::push_escaped_key_segment(key, out);
match value {
Value::Null => {
out.push_str(": null\n");
}
Value::Bool(b) => {
out.push_str(": ");
out.push_str(if *b { "true" } else { "false" });
out.push('\n');
}
Value::Integer(s) => {
out.push_str(": ");
out.push_str(s);
out.push('\n');
}
Value::Float(s) => {
out.push_str(": ");
out.push_str(&canonical_float(s));
out.push('\n');
}
Value::String(s) => {
emit_string_in_pair(s, indent, out)?;
}
Value::Array(items) => {
if items.is_empty() {
out.push_str(": []\n");
} else {
out.push_str(": [\n");
for item in items {
emit_array_item(item, indent + 1, out)?;
}
push_indent(out, indent);
out.push_str("]\n");
}
}
Value::Object(obj) => {
if obj.is_empty() {
out.push_str(": {}\n");
} else {
out.push_str(": {\n");
emit_object_pairs(obj, indent + 1, out)?;
push_indent(out, indent);
out.push_str("}\n");
}
}
}
Ok(())
}
fn emit_array_item(value: &Value, indent: usize, out: &mut String) -> Result<()> {
push_indent(out, indent);
match value {
Value::Null => {
out.push_str("null\n");
}
Value::Bool(b) => {
out.push_str(if *b { "true" } else { "false" });
out.push('\n');
}
Value::Integer(s) => {
out.push_str(s);
out.push('\n');
}
Value::Float(s) => {
out.push_str(&canonical_float(s));
out.push('\n');
}
Value::String(s) => {
emit_string_as_item(s, indent, out)?;
}
Value::Array(items) => {
if items.is_empty() {
out.push_str("[]\n");
} else {
out.push_str("[\n");
for item in items {
emit_array_item(item, indent + 1, out)?;
}
push_indent(out, indent);
out.push_str("]\n");
}
}
Value::Object(obj) => {
if obj.is_empty() {
out.push_str("{}\n");
} else {
out.push_str("{\n");
emit_object_pairs(obj, indent + 1, out)?;
push_indent(out, indent);
out.push_str("}\n");
}
}
}
Ok(())
}
fn emit_string_in_pair(s: &str, indent: usize, out: &mut String) -> Result<()> {
if s.is_empty() {
out.push_str(":\n");
return Ok(());
}
if s.contains('\r') {
return Err(Error::Message(
"String containing CR (0x0D) is not representable in canonical form (§ 5.9.7)".into(),
));
}
if s.contains('\n') {
return emit_multiline_string(s, indent, true, out);
}
if needs_raw_marker(s) {
out.push_str(":: ");
out.push_str(s);
out.push('\n');
} else {
out.push_str(": ");
out.push_str(s);
out.push('\n');
}
Ok(())
}
fn emit_string_as_item(s: &str, indent: usize, out: &mut String) -> Result<()> {
if s.is_empty() {
out.push_str("::\n");
return Ok(());
}
if s.contains('\r') {
return Err(Error::Message(
"String containing CR (0x0D) is not representable in canonical form (§ 5.9.7)".into(),
));
}
if s.contains('\n') {
return emit_multiline_string(s, indent, false, out);
}
if needs_raw_marker(s) {
out.push_str(":: ");
out.push_str(s);
out.push('\n');
} else {
out.push_str(s);
out.push('\n');
}
Ok(())
}
fn emit_multiline_string(s: &str, indent: usize, is_pair: bool, out: &mut String) -> Result<()> {
let segments: Vec<&str> = s.split('\n').collect();
let has_sole_double_paren = segments.iter().any(|l| l.trim() == "))");
let has_sole_single_paren = segments.iter().any(|l| l.trim() == ")");
if has_sole_double_paren && !has_sole_single_paren {
emit_multiline_stripped(&segments, indent, is_pair, out);
} else {
emit_multiline_verbatim(&segments, indent, is_pair, out);
}
Ok(())
}
fn emit_multiline_verbatim(segments: &[&str], indent: usize, is_pair: bool, out: &mut String) {
if is_pair {
out.push_str(": ((\n");
} else {
out.push_str("((\n");
}
for (i, seg) in segments.iter().enumerate() {
if i > 0 {
out.push('\n');
}
out.push_str(seg);
}
out.push('\n');
push_indent(out, indent);
out.push_str("))\n");
}
fn emit_multiline_stripped(segments: &[&str], indent: usize, is_pair: bool, out: &mut String) {
if is_pair {
out.push_str(": (\n");
} else {
out.push_str("(\n");
}
for (i, seg) in segments.iter().enumerate() {
if i > 0 {
out.push('\n');
}
if !seg.trim().is_empty() {
out.push_str(seg.trim_start());
}
}
out.push('\n');
push_indent(out, indent);
out.push_str(")\n");
}
fn canonical_float(s: &str) -> String {
let val: f64 = match s.parse() {
Ok(v) => v,
Err(_) => return s.to_string(), };
if val == 0.0 {
return s.to_string();
}
let abs = val.abs();
if !(1e-2..1e7).contains(&abs) {
let raw = format!("{:e}", val); normalise_scientific(&raw)
} else {
s.to_string()
}
}
fn normalise_scientific(raw: &str) -> String {
let e_pos = raw.find('e').unwrap_or(raw.len());
let mantissa = &raw[..e_pos];
let exp_part = &raw[e_pos + 1..];
let mantissa = if mantissa.contains('.') {
let trimmed = mantissa.trim_end_matches('0');
trimmed.trim_end_matches('.')
} else {
mantissa
};
let exp_str = exp_part.trim_start_matches('+');
format!("{}e{}", mantissa, exp_str)
}
fn needs_raw_marker(body: &str) -> bool {
if body.is_empty() {
return false;
}
let bytes = body.as_bytes();
if bytes[0] == b' '
|| bytes[0] == b'\t'
|| *bytes.last().unwrap() == b' '
|| *bytes.last().unwrap() == b'\t'
{
return true;
}
if bytes[0] == b'{' || bytes[0] == b'[' {
return true;
}
if matches!(body, "(" | "((" | "()" | "(())") {
return true;
}
if matches!(body, "null" | "true" | "false") {
return true;
}
if crate::parser::classify::matches_integer_grammar(body) {
return true;
}
if crate::parser::classify::matches_float_grammar(body) {
return true;
}
false
}
const INDENT: &str = " ";
fn push_indent(out: &mut String, level: usize) {
const SPACES: &str = " "; let mut remaining = level * INDENT.len();
if remaining == 0 {
return;
}
out.reserve(remaining);
while remaining > 0 {
let chunk = remaining.min(SPACES.len());
out.push_str(&SPACES[..chunk]);
remaining -= chunk;
}
}
fn estimate_size(value: &Value) -> usize {
match value {
Value::Null => 5,
Value::Bool(_) => 6,
Value::Integer(s) | Value::Float(s) | Value::String(s) => s.len() + 8,
Value::Array(items) => 4 + items.iter().map(estimate_size).sum::<usize>(),
Value::Object(obj) => obj
.iter()
.map(|(k, v)| k.len() + 4 + estimate_size(v))
.sum::<usize>()
.saturating_add(4),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::value::ObjectMap;
use compact_str::CompactString;
use indexmap::IndexMap;
use rustc_hash::FxBuildHasher;
fn obj(pairs: Vec<(&str, Value)>) -> Value {
let mut map: ObjectMap = IndexMap::with_capacity_and_hasher(pairs.len(), FxBuildHasher);
for (k, v) in pairs {
map.insert(CompactString::new(k), v);
}
Value::Object(map)
}
fn arr(items: Vec<Value>) -> Value {
Value::Array(items)
}
fn int(n: i64) -> Value {
let mut buf = itoa::Buffer::new();
Value::Integer(CompactString::new(buf.format(n)))
}
fn float(f: f64) -> Value {
let mut buf = ryu::Buffer::new();
Value::Float(CompactString::new(buf.format(f)))
}
fn s(text: &str) -> Value {
Value::String(CompactString::new(text))
}
#[test]
fn empty_object_root_produces_zero_bytes() {
let v = obj(vec![]);
assert_eq!(emit_canonical(&v).unwrap(), "");
}
#[test]
fn empty_array_root_produces_brackets() {
let v = arr(vec![]);
assert_eq!(emit_canonical(&v).unwrap(), "[]\n");
}
#[test]
fn simple_pairs() {
let v = obj(vec![
("host", s("localhost")),
("port", int(8080)),
("debug", Value::Bool(true)),
]);
let out = emit_canonical(&v).unwrap();
assert_eq!(out, "host: localhost\nport: 8080\ndebug: true\n");
}
#[test]
fn null_and_false_keywords() {
let v = obj(vec![
("maintenance", Value::Null),
("enabled", Value::Bool(false)),
]);
let out = emit_canonical(&v).unwrap();
assert_eq!(out, "maintenance: null\nenabled: false\n");
}
#[test]
fn float_values() {
let v = obj(vec![("ratio", float(0.5)), ("sci", float(1.5e-3))]);
let out = emit_canonical(&v).unwrap();
assert!(out.contains("ratio: 0.5\n"), "got: {out}");
assert!(
out.contains("sci: 1.5e-3\n") || out.contains("sci: 0.0015\n"),
"got: {out}"
);
}
#[test]
fn empty_string_pair() {
let v = obj(vec![("note", s(""))]);
let out = emit_canonical(&v).unwrap();
assert_eq!(out, "note:\n");
}
#[test]
fn raw_marker_for_keywords() {
let v = obj(vec![("a", s("true")), ("b", s("null")), ("c", s("false"))]);
let out = emit_canonical(&v).unwrap();
assert_eq!(out, "a:: true\nb:: null\nc:: false\n");
}
#[test]
fn raw_marker_for_numbers() {
let v = obj(vec![("a", s("42")), ("b", s("0.5")), ("c", s("0xFF"))]);
let out = emit_canonical(&v).unwrap();
assert!(out.contains("a:: 42\n"));
assert!(out.contains("b:: 0.5\n"));
assert!(out.contains("c:: 0xFF\n"));
}
#[test]
fn raw_marker_for_inline_opener() {
let v = obj(vec![("a", s("{hello}"))]);
let out = emit_canonical(&v).unwrap();
assert!(out.contains("a:: {hello}\n"));
}
#[test]
fn nested_object() {
let v = obj(vec![(
"server",
obj(vec![("host", s("localhost")), ("port", int(8080))]),
)]);
let out = emit_canonical(&v).unwrap();
let expected = "server: {\n host: localhost\n port: 8080\n}\n";
assert_eq!(out, expected);
}
#[test]
fn nested_array() {
let v = obj(vec![("tags", arr(vec![s("a"), s("b")]))]);
let out = emit_canonical(&v).unwrap();
let expected = "tags: [\n a\n b\n]\n";
assert_eq!(out, expected);
}
#[test]
fn array_root_bare_items() {
let v = arr(vec![s("foo"), s("bar"), s("baz")]);
let out = emit_canonical(&v).unwrap();
assert_eq!(out, "foo\nbar\nbaz\n");
}
#[test]
fn array_root_wraps_when_first_item_is_compound() {
let v = arr(vec![arr(vec![s("a"), s("b")]), arr(vec![s("c"), s("d")])]);
let out = emit_canonical(&v).unwrap();
let expected =
"[\n [\n a\n b\n ]\n [\n c\n d\n ]\n]\n";
assert_eq!(out, expected);
}
#[test]
fn array_root_does_not_wrap_for_scalars() {
let v = arr(vec![int(1), int(2), int(3)]);
let out = emit_canonical(&v).unwrap();
assert_eq!(out, "1\n2\n3\n");
}
#[test]
fn cr_in_string_is_error() {
let v = obj(vec![("x", s("hello\rworld"))]);
assert!(emit_canonical(&v).is_err());
}
#[test]
fn verbatim_multiline_string() {
let v = obj(vec![("msg", s("line one\nline two"))]);
let out = emit_canonical(&v).unwrap();
assert_eq!(
out,
"msg: ((\n\
line one\n\
line two\n\
))\n"
);
}
#[test]
fn verbatim_multiline_in_array_item() {
let v = arr(vec![s("line one\nline two"), s("end")]);
let out = emit_canonical(&v).unwrap();
assert_eq!(
out,
"((\n\
line one\n\
line two\n\
))\n\
end\n"
);
}
#[test]
fn empty_string_array_item() {
let v = arr(vec![s(""), s("ok")]);
let out = emit_canonical(&v).unwrap();
assert_eq!(out, "::\nok\n");
}
#[test]
fn raw_marker_for_paren_tokens() {
let v = obj(vec![
("a", s("(")),
("b", s("((")),
("c", s("()")),
("d", s("(())")),
]);
let out = emit_canonical(&v).unwrap();
assert!(out.contains("a:: (\n"));
assert!(out.contains("b:: ((\n"));
assert!(out.contains("c:: ()\n"));
assert!(out.contains("d:: (())\n"));
}
#[test]
fn mixed_heterogeneous_array() {
let v = obj(vec![(
"mixed",
arr(vec![
s("plain_string"),
int(42),
Value::Bool(true),
Value::Null,
s("true"), obj(vec![("nested_obj", s("inside"))]),
arr(vec![s("nested_array")]),
]),
)]);
let out = emit_canonical(&v).unwrap();
let expected = "\
mixed: [
plain_string
42
true
null
:: true
{
nested_obj: inside
}
[
nested_array
]
]
";
assert_eq!(out, expected);
}
#[test]
fn integer_canonical_negative() {
let v = obj(vec![("x", int(-1)), ("y", int(-42))]);
let out = emit_canonical(&v).unwrap();
assert!(out.contains("x: -1\n"));
assert!(out.contains("y: -42\n"));
}
#[test]
fn integer_canonical_zero() {
let v = obj(vec![("z", int(0))]);
let out = emit_canonical(&v).unwrap();
assert!(out.contains("z: 0\n"));
}
#[test]
fn needs_raw_marker_integer_forms() {
assert!(needs_raw_marker("42"));
assert!(needs_raw_marker("-1"));
assert!(needs_raw_marker("+7"));
assert!(needs_raw_marker("0xFF"));
assert!(needs_raw_marker("0o755"));
assert!(needs_raw_marker("0b1111_0000"));
assert!(needs_raw_marker("1_000_000"));
assert!(!needs_raw_marker("hello"));
assert!(!needs_raw_marker("42abc"));
}
#[test]
fn needs_raw_marker_float_forms() {
assert!(needs_raw_marker("0.5"));
assert!(needs_raw_marker("1.5e-3"));
assert!(needs_raw_marker("1e9"));
assert!(!needs_raw_marker("1."));
assert!(!needs_raw_marker(".5"));
}
}