fig 1.0.0

Parse, edit, and convert config files while preserving comments. Supports JSON, YAML, TOML, and more.
//! Materialization: collapse the YAML reference/annotation layer (aliases, merge
//! keys, tags, anchors) into the universal core AST, so a non-YAML printer — or
//! any consumer that wants concrete data — never has to know about it.
//!
//! `materialize` returns a NEW AST allocated in the given (arena) allocator:
//!   - aliases (`*name`)      → a deep copy of the anchor's subtree
//!   - merge keys (`<<`)      → flattened into the host mapping (host keys win;
//!                              earlier merge sources win over later)
//!   - core `!!` tags         → applied to the node's type, then dropped
//!   - unknown/custom tags    → strict: error; lax: dropped (node kept as parsed)
//!   - anchors                → dropped
//!
//! String slices in the result borrow from the source AST (source text or its
//! `owned_strings`); the result must not outlive them. With an arena that holds
//! both, this is automatic.

const std = @import("std");
const Allocator = std.mem.Allocator;
const AST = @import("../ast/ast.zig");

pub const TagMode = enum { strict, lax };

pub const Error = error{
    AliasCycle,
    UnknownTag,
    TagTypeMismatch,
} || AST.ResolveError || Allocator.Error;

/// Collapse `ast`'s reference layer into a fresh core AST built in `arena`.
pub fn materialize(arena: Allocator, ast: *const AST, mode: TagMode) Error!AST {
    var m: Materializer = .{ .src = ast, .arena = arena, .mode = mode };
    const root = try m.copy(ast.nodes[ast.root]);
    var result: AST = .{
        .allocator = arena,
        .owned_strings = &.{},
        .root = root,
        .nodes = try m.out.toOwnedSlice(arena),
    };
    // Comments ride along: `copy` carried each source node's comments onto its
    // new id, so the parallel table just hands off (only if any were present).
    if (m.any_comments) result.node_comments = try m.out_comments.toOwnedSlice(arena);
    return result;
}

const Materializer = struct {
    src: *const AST,
    arena: Allocator,
    mode: TagMode,
    out: std.ArrayList(AST.Node) = .empty,
    /// Parallel to `out`: comments carried from each source node onto its copy.
    /// Default-empty per node; materialized only when `any_comments`.
    out_comments: std.ArrayList(AST.NodeComments) = .empty,
    any_comments: bool = false,
    /// Anchor target ids currently being expanded, to catch structural cycles
    /// (`&a [*a]`) and self-merges (`&m { <<: *m }`).
    path: std.ArrayList(AST.Node.Id) = .empty,

    fn emit(self: *Materializer, kind: AST.Node.Kind) Error!AST.Node.Id {
        const id: AST.Node.Id = @intCast(self.out.items.len);
        try self.out.append(self.arena, .{ .id = id, .kind = kind, .next_sibling = null });
        try self.out_comments.append(self.arena, .{});
        return id;
    }

    /// Copy the comments bound to source node `src_id` onto the freshly emitted
    /// node `new_id`. The `leading` slice is re-duped into the arena; comment
    /// text borrows the source AST, which the arena outlives.
    fn carry(self: *Materializer, src_id: AST.Node.Id, new_id: AST.Node.Id) Error!void {
        const c = self.src.comments(src_id);
        if (c.isEmpty()) return;
        self.out_comments.items[new_id] = .{
            .leading = try self.arena.dupe(AST.Comment, c.leading),
            .trailing = c.trailing,
            .dangling = try self.arena.dupe(AST.Comment, c.dangling),
        };
        self.any_comments = true;
    }

    fn enter(self: *Materializer, id: AST.Node.Id) Error!void {
        for (self.path.items) |p| if (p == id) return error.AliasCycle;
        try self.path.append(self.arena, id);
    }
    fn leave(self: *Materializer) void {
        _ = self.path.pop();
    }

    /// Deep-copy `node` into the output, expanding aliases/merges and applying
    /// tags. Returns the new node id.
    fn copy(self: *Materializer, node: AST.Node) Error!AST.Node.Id {
        switch (node.kind) {
            .alias => {
                const target = try self.src.resolveDeep(node);
                try self.enter(target);
                defer self.leave();
                return self.copy(self.src.nodes[target]);
            },
            // `.extended` never appears in a YAML AST, but the switch must be
            // exhaustive; treat it as a plain scalar for completeness.
            .null_, .boolean, .number, .extended, .string => {
                const id = try self.emit(try self.applyScalarTag(node));
                try self.carry(node.id, id);
                return id;
            },
            .sequence => |first| {
                try self.checkCollectionTag(node, .seq);
                const id = try self.emit(.{ .sequence = null });
                try self.carry(node.id, id);
                var last: ?AST.Node.Id = null;
                var child = first;
                while (child) |cid| : (child = self.src.nodes[cid].next_sibling) {
                    const new_id = try self.copy(self.src.nodes[cid]);
                    self.link(id, &last, new_id, .sequence);
                }
                return id;
            },
            .mapping => return self.copyMapping(node),
            .keyvalue => unreachable, // keyvalues are only produced inside copyMapping
        }
    }

    /// Append `child_id` to container `container_id`, threading siblings.
    fn link(self: *Materializer, container_id: AST.Node.Id, last: *?AST.Node.Id, child_id: AST.Node.Id, comptime kind: enum { sequence, mapping }) void {
        if (last.*) |p| {
            self.out.items[p].next_sibling = child_id;
        } else {
            self.out.items[container_id].kind = switch (kind) {
                .sequence => .{ .sequence = child_id },
                .mapping => .{ .mapping = child_id },
            };
        }
        last.* = child_id;
    }

    fn copyMapping(self: *Materializer, node: AST.Node) Error!AST.Node.Id {
        try self.checkCollectionTag(node, .map);
        const id = try self.emit(.{ .mapping = null });
        try self.carry(node.id, id);
        var last: ?AST.Node.Id = null;
        var seen: std.ArrayList([]const u8) = .empty;

        // Local entries first (they win); remember any `<<` merge value.
        var merge_value: ?AST.Node.Id = null;
        var entry = node.kind.mapping;
        while (entry) |eid| : (entry = self.src.nodes[eid].next_sibling) {
            const kv = self.src.nodes[eid].kind.keyvalue;
            if (self.stringKey(kv.key)) |ks| if (std.mem.eql(u8, ks, "<<")) {
                merge_value = kv.value;
                continue;
            };
            try self.appendEntry(id, &last, &seen, kv.key, kv.value);
        }
        if (merge_value) |mv| try self.mergeInto(id, &last, &seen, mv);
        return id;
    }

    /// Copy a key/value pair into mapping `map_id` and record the (string) key.
    fn appendEntry(self: *Materializer, map_id: AST.Node.Id, last: *?AST.Node.Id, seen: *std.ArrayList([]const u8), key: AST.Node.Id, value: AST.Node.Id) Error!void {
        const key_str = self.stringKey(key);
        const new_key = try self.copy(self.src.nodes[key]);
        const new_val = try self.copy(self.src.nodes[value]);
        const kvid = try self.emit(.{ .keyvalue = .{ .key = new_key, .value = new_val } });
        self.link(map_id, last, kvid, .mapping);
        if (key_str) |ks| try seen.append(self.arena, ks);
    }

    /// Splice the entries of a `<<` merge value into mapping `map_id`, skipping
    /// keys already present (`seen`). Handles a single alias/mapping or a sequence
    /// of them (earlier sources win because they are processed first).
    fn mergeInto(self: *Materializer, map_id: AST.Node.Id, last: *?AST.Node.Id, seen: *std.ArrayList([]const u8), mv: AST.Node.Id) Error!void {
        const mvn = self.src.nodes[mv];
        switch (mvn.kind) {
            .sequence => |first| {
                var e = first;
                while (e) |eid| : (e = self.src.nodes[eid].next_sibling) {
                    try self.mergeOneSource(map_id, last, seen, self.src.nodes[eid]);
                }
            },
            else => try self.mergeOneSource(map_id, last, seen, mvn),
        }
    }

    fn mergeOneSource(self: *Materializer, map_id: AST.Node.Id, last: *?AST.Node.Id, seen: *std.ArrayList([]const u8), source: AST.Node) Error!void {
        const target_id = try self.src.resolveDeep(source);
        try self.enter(target_id); // guards a self-referential merge
        defer self.leave();
        const target = self.src.nodes[target_id];
        if (target.kind != .mapping) return; // a non-mapping merge source is ignored

        var entry = target.kind.mapping;
        while (entry) |eid| : (entry = self.src.nodes[eid].next_sibling) {
            const kv = self.src.nodes[eid].kind.keyvalue;
            const key_str = self.stringKey(kv.key);
            if (key_str) |ks| {
                if (std.mem.eql(u8, ks, "<<")) {
                    try self.mergeInto(map_id, last, seen, kv.value); // nested merge in a source
                    continue;
                }
                if (contains(seen.items, ks)) continue; // shadowed by a local or earlier source
            }
            try self.appendEntry(map_id, last, seen, kv.key, kv.value);
        }
    }

    fn stringKey(self: *const Materializer, key: AST.Node.Id) ?[]const u8 {
        return switch (self.src.nodes[key].kind) {
            .string => |s| s,
            else => null,
        };
    }

    // ── tags ──────────────────────────────────────────────────────────────

    fn applyScalarTag(self: *const Materializer, node: AST.Node) Error!AST.Node.Kind {
        const tag = self.src.node_tags[node.id] orelse return node.kind;
        if (coreTagName(tag)) |name| {
            if (std.mem.eql(u8, name, "str")) return .{ .string = self.scalarText(node) };
            if (std.mem.eql(u8, name, "null")) return .null_;
            if (std.mem.eql(u8, name, "bool")) return self.asBool(node);
            if (std.mem.eql(u8, name, "int")) return .{ .number = .{ .raw = self.scalarText(node), .kind = .integer } };
            if (std.mem.eql(u8, name, "float")) return .{ .number = .{ .raw = self.scalarText(node), .kind = .float } };
            if (std.mem.eql(u8, name, "seq") or std.mem.eql(u8, name, "map")) return error.TagTypeMismatch;
            // a `!!`-secondary tag with an unrecognized core name is custom.
        }
        return self.customTag(tag, node.kind);
    }

    fn checkCollectionTag(self: *const Materializer, node: AST.Node, comptime want: enum { seq, map }) Error!void {
        const tag = self.src.node_tags[node.id] orelse return;
        if (coreTagName(tag)) |name| {
            const ok = if (want == .seq) "seq" else "map";
            if (std.mem.eql(u8, name, ok)) return;
            if (eqlAny(name, &.{ "seq", "map", "str", "int", "float", "bool", "null" }))
                return error.TagTypeMismatch;
        }
        _ = try self.customTag(tag, node.kind); // strict-errors on unknown; lax no-op
    }

    /// A non-core tag: the non-specific `!` is a no-op; anything else is rejected
    /// in strict mode and dropped (node kept as parsed) in lax mode.
    fn customTag(self: *const Materializer, tag: []const u8, kind: AST.Node.Kind) Error!AST.Node.Kind {
        if (std.mem.eql(u8, tag, "!")) return kind;
        if (self.mode == .strict) return error.UnknownTag;
        return kind;
    }

    fn asBool(self: *const Materializer, node: AST.Node) Error!AST.Node.Kind {
        const t = self.scalarText(node);
        if (eqlAny(t, &.{ "true", "True", "TRUE" })) return .{ .boolean = true };
        if (eqlAny(t, &.{ "false", "False", "FALSE" })) return .{ .boolean = false };
        return error.TagTypeMismatch;
    }

    /// The scalar's text for `!!str`-style forcing. Numbers and strings are exact;
    /// bool/null are reconstructed in canonical form (the original presentation is
    /// not retained on the AST, which is source-free).
    fn scalarText(self: *const Materializer, node: AST.Node) []const u8 {
        _ = self;
        return switch (node.kind) {
            .string => |s| s,
            .number => |n| n.raw,
            .boolean => |b| if (b) "true" else "false",
            .null_ => "null",
            else => "",
        };
    }
};

/// If `tag` names a YAML core-schema type, returns the bare name (`str`, `int`,
/// `bool`, `null`, `seq`, `map`, …). Accepts the `!!name` shorthand and the
/// verbose `tag:yaml.org,2002:name` form (with or without `!<>`).
fn coreTagName(tag: []const u8) ?[]const u8 {
    if (tag.len >= 3 and tag[0] == '!' and tag[1] == '!') return tag[2..];
    const core = "tag:yaml.org,2002:";
    if (tag.len >= 3 and tag[0] == '!' and tag[1] == '<' and tag[tag.len - 1] == '>') {
        const inner = tag[2 .. tag.len - 1];
        if (std.mem.startsWith(u8, inner, core)) return inner[core.len..];
        return null;
    }
    if (std.mem.startsWith(u8, tag, core)) return tag[core.len..];
    return null;
}

fn contains(haystack: []const []const u8, needle: []const u8) bool {
    for (haystack) |s| if (std.mem.eql(u8, s, needle)) return true;
    return false;
}

fn eqlAny(s: []const u8, options: []const []const u8) bool {
    return contains(options, s);
}

// ── tests ────────────────────────────────────────────────────────────────
const testing = std.testing;
const Parser = @import("parser.zig");

fn hasAlias(ast: AST) bool {
    for (ast.nodes) |n| if (n.kind == .alias) return true;
    return false;
}

test "materialize: alias expands to a copied subtree" {
    const doc = try Parser.parse(testing.allocator, "a: &x [1, 2]\nb: *x\n", .v1_2_2);
    defer doc.deinit(testing.allocator);
    var arena = std.heap.ArenaAllocator.init(testing.allocator);
    defer arena.deinit();
    const mat = try materialize(arena.allocator(), &doc.ast, .strict);

    try testing.expect(!hasAlias(mat)); // no alias survives
    const b = try mat.getValByPath(&.{.{ .key = "b" }});
    try testing.expect(b.kind == .sequence);
    const first = try mat.getValByPath(&.{ .{ .key = "b" }, .{ .index = 0 } });
    try testing.expectEqualSlices(u8, "1", first.kind.number.raw);
    // The copy is independent of the original `a` (distinct node ids).
    const a = try mat.getValByPath(&.{.{ .key = "a" }});
    try testing.expect(a.id != b.id);
}

test "materialize: merge flattens with host and earlier-source precedence" {
    const src =
        \\base: &b
        \\  x: 1
        \\  y: 2
        \\over: &o
        \\  x: 9
        \\d:
        \\  <<: [*o, *b]
        \\  y: 3
        \\
    ;
    const doc = try Parser.parse(testing.allocator, src, .v1_2_2);
    defer doc.deinit(testing.allocator);
    var arena = std.heap.ArenaAllocator.init(testing.allocator);
    defer arena.deinit();
    const mat = try materialize(arena.allocator(), &doc.ast, .strict);

    try testing.expectEqualSlices(u8, "3", (try mat.getValByPath(&.{ .{ .key = "d" }, .{ .key = "y" } })).kind.number.raw);
    try testing.expectEqualSlices(u8, "9", (try mat.getValByPath(&.{ .{ .key = "d" }, .{ .key = "x" } })).kind.number.raw);
    // The `<<` key itself is gone from the flattened mapping.
    try testing.expectError(error.NotFound, mat.getValByPath(&.{ .{ .key = "d" }, .{ .key = "<<" } }));
}

test "materialize: core tags applied then dropped" {
    const doc = try Parser.parse(testing.allocator, "a: !!str 123\nb: !!int \"42\"\nc: !!null x\n", .v1_2_2);
    defer doc.deinit(testing.allocator);
    var arena = std.heap.ArenaAllocator.init(testing.allocator);
    defer arena.deinit();
    const mat = try materialize(arena.allocator(), &doc.ast, .strict);

    try testing.expectEqualSlices(u8, "123", (try mat.getValByPath(&.{.{ .key = "a" }})).kind.string);
    const b = try mat.getValByPath(&.{.{ .key = "b" }});
    try testing.expect(b.kind == .number and b.kind.number.kind == .integer);
    try testing.expect((try mat.getValByPath(&.{.{ .key = "c" }})).kind == .null_);
}

test "materialize: unknown tag strict errors, lax drops" {
    const doc = try Parser.parse(testing.allocator, "x: !custom 1\n", .v1_2_2);
    defer doc.deinit(testing.allocator);
    var arena = std.heap.ArenaAllocator.init(testing.allocator);
    defer arena.deinit();
    try testing.expectError(error.UnknownTag, materialize(arena.allocator(), &doc.ast, .strict));
    const mat = try materialize(arena.allocator(), &doc.ast, .lax);
    try testing.expectEqualSlices(u8, "1", (try mat.getValByPath(&.{.{ .key = "x" }})).kind.number.raw);
}

test "materialize: collection-tag mismatch and cyclic alias error" {
    {
        const doc = try Parser.parse(testing.allocator, "x: !!str [1, 2]\n", .v1_2_2);
        defer doc.deinit(testing.allocator);
        var arena = std.heap.ArenaAllocator.init(testing.allocator);
        defer arena.deinit();
        try testing.expectError(error.TagTypeMismatch, materialize(arena.allocator(), &doc.ast, .strict));
    }
    {
        const doc = try Parser.parse(testing.allocator, "- &a [*a]\n", .v1_2_2);
        defer doc.deinit(testing.allocator);
        var arena = std.heap.ArenaAllocator.init(testing.allocator);
        defer arena.deinit();
        try testing.expectError(error.AliasCycle, materialize(arena.allocator(), &doc.ast, .strict));
    }
}