fig 1.0.0

Parse, edit, and convert config files while preserving comments. Supports JSON, YAML, TOML, and more.
//! The native "fig" printer: a total, 1:1 text encoding of the AST.
//!
//! Unlike the format printers (JSON/YAML/TOML/ZON), this one is a *bijection*
//! with the AST — every `Node.Kind` arm and the YAML reference layer (anchors,
//! tags, aliases) has an unambiguous surface form, so any AST round-trips
//! through it unchanged. It is the default/debug representation and the
//! comparison oracle: canonicalize two documents to native text and `strcmp`.
//!
//! Grammar (informal):
//!   node      ::= prefix* value
//!   prefix    ::= '&' name  |  '!' tagtext        (anchor / tag, space-separated)
//!   value     ::= 'null' | 'true' | 'false'
//!               | string                          ("…", JSON escapes)
//!               | number                          (raw lexeme, see `number`)
//!               | '@' extkind ' ' string          (extended scalar)
//!               | '[' (node (',' node)*)? ']'     (sequence)
//!               | '{' (node ':' node (',' …)*)? '}'  (mapping; keys are nodes)
//!               | '*' name                        (alias)
//!
//! A `number`'s `kind` (integer vs float) is normally implied by its lexeme. On
//! the rare node whose stored kind disagrees with the lexeme (constructible via
//! `AST.Builder.addNumberRaw`), a `~i`/`~f` sigil pins it. Common data never
//! triggers it.

const Printer = @This();
const std = @import("std");
const AST = @import("../ast/ast.zig");
const json_string = @import("../util/json_string.zig");
const Writer = std.Io.Writer;

/// The native encoding is total over every AST *node kind* — it rejects no
/// variant. The only failures are the underlying writer's and `NestingTooDeep`,
/// a recursion guard: a pathologically nested AST (e.g. a fuzzer feeding the
/// oracle, or a hand-built `Builder` tree) would otherwise overflow the stack.
/// `max_depth` matches the native parser's guard, so any AST the parser accepts
/// prints without hitting it.
pub const Error = Writer.Error || error{NestingTooDeep};

/// Maximum container-nesting depth, shared with `native/parser.zig`. Bounds the
/// recursion in both directions of the bijection.
pub const max_depth = 512;

writer: *Writer,
ast: *const AST,

/// Spaces per indentation level. The native encoding is a canonical oracle —
/// one document has exactly one spelling — so this is a fixed constant, not a
/// knob: a configurable canonical form would defeat the comparison oracle.
const indent_width = 2;

/// Print the whole AST to `writer`, with a trailing newline, and flush.
pub fn print(writer: *Writer, ast: *const AST) Error!void {
    var p: Printer = .{ .writer = writer, .ast = ast };
    try p.leadingComments(ast.leadingCommentAnchor(ast.root), 0);
    try p.node(ast.root, 0);
    // A container root emitted its own trailing beside its opening delimiter; a
    // scalar root's trailing is emitted here.
    if (!p.isContainer(ast.trailingCommentAnchor(ast.root)))
        try p.trailingComment(ast.trailingCommentAnchor(ast.root));
    try writer.writeByte('\n');
    try writer.flush();
}

/// Print the subtree rooted at `id`. Adds no trailing newline and does not
/// flush (used for partial renders and by `print`).
pub fn printNode(writer: *Writer, ast: *const AST, id: AST.Node.Id, depth: usize) Error!void {
    var p: Printer = .{ .writer = writer, .ast = ast };
    try p.node(id, depth);
}

fn node(self: *Printer, id: AST.Node.Id, depth: usize) Error!void {
    try self.prefixes(id);
    const n = self.ast.nodes[id];
    switch (n.kind) {
        .null_ => try self.writer.writeAll("null"),
        .boolean => |value| try self.writer.writeAll(if (value) "true" else "false"),
        .number => |value| try self.number(value),
        .string => |value| try json_string.writeQuoted(self.writer, value),
        .extended => |value| {
            try self.writer.writeByte('@');
            try self.writer.writeAll(@tagName(value.kind));
            try self.writer.writeByte(' ');
            try json_string.writeQuoted(self.writer, value.text);
        },
        .alias => |name| {
            try self.writer.writeByte('*');
            try self.writer.writeAll(name);
        },
        .sequence => |first_child| try self.container(id, '[', ']', first_child, depth),
        .mapping => |first_child| try self.container(id, '{', '}', first_child, depth),
        .keyvalue => |kv| {
            try self.node(kv.key, depth);
            try self.writer.writeAll(": ");
            try self.node(kv.value, depth);
        },
    }
}

/// Emit the YAML reference-layer prefixes attached to this node id: an anchor
/// (`&name `) and/or a tag (`!!str `). Both are stored in side-tables that are
/// empty for non-YAML documents, so the length guard short-circuits there.
fn prefixes(self: *Printer, id: AST.Node.Id) Error!void {
    const a = self.ast;
    if (id < a.node_anchors.len) if (a.node_anchors[id]) |name| {
        try self.writer.writeByte('&');
        try self.writer.writeAll(name);
        try self.writer.writeByte(' ');
    };
    // `node_tags` stores the tag verbatim, leading `!` included (e.g. `!!str`).
    if (id < a.node_tags.len) if (a.node_tags[id]) |tag| {
        try self.writer.writeAll(tag);
        try self.writer.writeByte(' ');
    };
}

/// Render a number's raw lexeme verbatim. If the lexeme's implied kind disagrees
/// with the stored kind, prefix `~i`/`~f` so the parser can restore it exactly.
fn number(self: *Printer, value: AST.Node.Kind.Number) Error!void {
    if (impliedNumberKind(value.raw) != value.kind) {
        try self.writer.writeAll(if (value.kind == .float) "~f" else "~i");
    }
    try self.writer.writeAll(value.raw);
}

/// Classify a numeric lexeme the same way the JSON parser's `getNumber` does, so
/// a bare number round-trips to the same `kind`. Hex (`0x…`) is an integer; a
/// dot or an `e`/`E` exponent makes it a float; otherwise integer. Total (never
/// errors): a malformed lexeme that can't come from a printer is treated as the
/// nearest of the two.
pub fn impliedNumberKind(raw: []const u8) @TypeOf(@as(AST.Node.Kind.Number, undefined).kind) {
    const body = if (raw.len > 0 and (raw[0] == '+' or raw[0] == '-')) raw[1..] else raw;
    if (body.len >= 2 and body[0] == '0' and (body[1] == 'x' or body[1] == 'X')) return .integer;
    if (std.mem.indexOfScalar(u8, raw, '.') != null) return .float;
    if (std.mem.indexOfAny(u8, raw, "eE") != null) return .float;
    return .integer;
}

/// Sequences and mappings differ only in delimiters and in how each child
/// renders (a bare node vs. a `key: value`), the latter handled by `node`.
fn container(self: *Printer, node_id: AST.Node.Id, open: u8, close: u8, first_child: ?AST.Node.Id, depth: usize) Error!void {
    const dangling = self.ast.comments(node_id).dangling;
    // Only a truly empty container (no children, no trailing orphan comments)
    // prints inline; otherwise it opens a block so the dangling run has a home.
    if (first_child == null and dangling.len == 0) {
        try self.writer.writeByte(open);
        try self.writer.writeByte(close);
        try self.trailingComment(node_id); // empty inline container: `[] // c`
        return;
    }
    // Guard the recursion below; the inline fast path above never descends.
    if (depth >= max_depth) return error.NestingTooDeep;
    try self.writer.writeByte(open);
    // The container's own trailing comment rides the line it opened on.
    try self.trailingComment(node_id);
    try self.writer.writeByte('\n');
    var current_id = first_child;
    while (current_id) |id| {
        // A mapping child is a `keyvalue`: its leading comment sits above the
        // key, its trailing comment after the value. A sequence child is the
        // value node itself, so both anchors collapse to `id`.
        try self.leadingComments(self.ast.leadingCommentAnchor(id), depth + 1);
        try self.writeIndent(depth + 1);
        try self.node(id, depth + 1);
        current_id = self.ast.nodes[id].next_sibling;
        if (current_id != null) try self.writer.writeByte(',');
        // A container child emits its own trailing beside its opener; skip here.
        const anchor = self.ast.trailingCommentAnchor(id);
        if (!self.isContainer(anchor)) try self.trailingComment(anchor);
        try self.writer.writeByte('\n');
    }
    // Comments dangling at the end of the body (after the last child, or the
    // entire body of an otherwise-empty container).
    for (dangling) |c| {
        try self.writeIndent(depth + 1);
        try self.writeComment(c);
        try self.writer.writeByte('\n');
    }
    try self.writeIndent(depth);
    try self.writer.writeByte(close);
}

/// Whether `id` is a container node (whose own trailing comment is emitted beside
/// its opening delimiter, not by its parent).
fn isContainer(self: *const Printer, id: AST.Node.Id) bool {
    return switch (self.ast.nodes[id].kind) {
        .sequence, .mapping => true,
        else => false,
    };
}

/// Emit a node's leading comments, one per line at `depth`, each terminated by a
/// newline (so the node's own indented line follows).
fn leadingComments(self: *Printer, id: AST.Node.Id, depth: usize) Error!void {
    for (self.ast.comments(id).leading) |c| {
        try self.writeIndent(depth);
        try self.writeComment(c);
        try self.writer.writeByte('\n');
    }
}

/// Emit a node's trailing comment (if any) after a leading space. No newline —
/// the caller closes the line.
fn trailingComment(self: *Printer, id: AST.Node.Id) Error!void {
    if (self.ast.comments(id).trailing) |c| {
        try self.writer.writeByte(' ');
        try self.writeComment(c);
    }
}

/// Render one comment in native syntax: `// text` for a line comment, `/* text
/// */` for a block. Mirrors the marker set the native parser accepts.
fn writeComment(self: *Printer, c: AST.Comment) Error!void {
    switch (c.style) {
        .line => {
            try self.writer.writeAll("//");
            if (c.text.len != 0) {
                try self.writer.writeByte(' ');
                try self.writer.writeAll(c.text);
            }
        },
        .block => {
            try self.writer.writeAll("/*");
            if (c.text.len != 0) {
                try self.writer.writeByte(' ');
                try self.writer.writeAll(c.text);
                try self.writer.writeByte(' ');
            }
            try self.writer.writeAll("*/");
        },
    }
}

fn writeIndent(self: *Printer, depth: usize) Error!void {
    try self.writer.splatByteAll(' ', depth * indent_width);
}

// Strings are quoted/escaped by the shared `json_string.writeQuoted` (the native
// parser's escape decoder accepts exactly that set).

// =======
// Testing
// =======

test "prints scalars, sequences, and mappings 1:1" {
    const Parser = @import("parser.zig");
    var ast = try Parser.parseAbstract(std.testing.allocator,
        \\{ "name": "fig", "port": 8080, "ratio": 1.0, "tags": ["a", true, null] }
    );
    defer ast.deinit();

    var out: Writer.Allocating = .init(std.testing.allocator);
    defer out.deinit();
    try print(&out.writer, &ast);
    try std.testing.expectEqualStrings(
        \\{
        \\  "name": "fig",
        \\  "port": 8080,
        \\  "ratio": 1.0,
        \\  "tags": [
        \\    "a",
        \\    true,
        \\    null
        \\  ]
        \\}
        \\
    , out.written());
}

test "prints empty containers inline" {
    const Parser = @import("parser.zig");
    var ast = try Parser.parseAbstract(std.testing.allocator, "{ \"a\": [], \"b\": {} }");
    defer ast.deinit();

    var out: Writer.Allocating = .init(std.testing.allocator);
    defer out.deinit();
    try print(&out.writer, &ast);
    try std.testing.expectEqualStrings(
        \\{
        \\  "a": [],
        \\  "b": {}
        \\}
        \\
    , out.written());
}

test "emits leading and trailing comments" {
    const a = std.testing.allocator;
    var b = AST.Builder.init(a);
    defer b.deinit();

    // { "name": "fig" } with a leading comment on the entry and a trailing
    // comment on its value, plus a block comment leading the whole document.
    const v_name = try b.addString("fig");
    try b.setComments(v_name, .{ .trailing = .{ .text = "inline", .style = .line } });
    const k_name = try b.addString("name");
    try b.setComments(k_name, .{ .leading = &.{.{ .text = "greeting", .style = .line }} });
    const root = try b.addMapping(&.{.{ .key = k_name, .value = v_name }});
    try b.setComments(root, .{ .leading = &.{.{ .text = "doc", .style = .block }} });

    var ast = try b.finish(root);
    defer ast.deinit();

    var out: Writer.Allocating = .init(a);
    defer out.deinit();
    try print(&out.writer, &ast);
    try std.testing.expectEqualStrings(
        \\/* doc */
        \\{
        \\  // greeting
        \\  "name": "fig" // inline
        \\}
        \\
    , out.written());
}

test "bounds nesting depth" {
    const a = std.testing.allocator;
    // Build `levels` nested one-element sequences around an empty innermost seq.
    const nest = struct {
        fn build(alloc: std.mem.Allocator, levels: usize) !AST {
            var b = AST.Builder.init(alloc);
            errdefer b.deinit();
            // Innermost holds a scalar so it isn't the empty-container fast path
            // (which prints inline and never recurses, escaping the guard).
            var id = try b.addSequence(&.{try b.addNull()}); // 1 level
            for (1..levels) |_| id = try b.addSequence(&.{id});
            const ast = try b.finish(id);
            b.deinit();
            return ast;
        }
    }.build;

    var out: Writer.Allocating = .init(a);
    defer out.deinit();

    // Exactly `max_depth` levels prints; one deeper is rejected, not a crash.
    var ok = try nest(a, max_depth);
    defer ok.deinit();
    try print(&out.writer, &ok);

    var too_deep = try nest(a, max_depth + 1);
    defer too_deep.deinit();
    try std.testing.expectError(error.NestingTooDeep, print(&out.writer, &too_deep));
}