fig 1.0.0

Parse, edit, and convert config files while preserving comments. Supports JSON, YAML, TOML, and more.
//! AST = Abstract Syntax Tree
//!
//! This data structure represents an abstract form of a document.
//! Two documents of different formats can theoretically have the same AST.
//!
//! The AST only uses its allocator when conversion is necessary
//! to represent a file's data in a normalized, format-independent form.
//! (For example, when storing decoded escape codes.)
//!
//! This file owns the struct itself — its fields, the node model, the comment
//! layer, and equality. The rest of the surface is split across sibling files
//! for readability and re-exported below as `AST` methods (see "Sub-modules").

const AST = @This();
const std = @import("std");
const activeTag = std.meta.activeTag;
const Allocator = std.mem.Allocator;
const util = @import("../util/util.zig");

// ── Sub-modules ─────────────────────────────────────────────────────────────
// The AST's behavior is split across sibling files, each operating on a
// `*const AST`/`*AST`. Zig 0.16 has no `usingnamespace`, so every method defined
// there is surfaced here as a decl alias: `ast.foo()` resolves `foo` on this
// type, and the aliased function receives the AST as its first parameter. The
// `Node`/`Comment`/etc. types those files lean on are declared in THIS file, so
// the imports below are mutually recursive — fine, since decl access is lazy.
const reader = @import("reader.zig");
const serializer = @import("serialize_options.zig");

// Builder — programmatic, bottom-up construction (the write path). Its file is
// itself the `Builder` struct, so the import IS the type.
pub const Builder = @import("builder.zig");

// Navigation + opt-in YAML reference-layer resolution (the read path).
pub const PathSegment = reader.PathSegment;
pub const ResolveError = reader.ResolveError;
pub const child = reader.child;
pub const next = reader.next;
pub const lastChild = reader.lastChild;
pub const firstChildKey = reader.firstChildKey;
pub const getNodeByPath = reader.getNodeByPath;
pub const getKeyByPath = reader.getKeyByPath;
pub const getValByPath = reader.getValByPath;
pub const resolveAlias = reader.resolveAlias;
pub const resolveDeep = reader.resolveDeep;
pub const mergedChild = reader.mergedChild;

// Serialization — render the AST to each supported format (the output path).
pub const SerializeFormat = serializer.SerializeFormat;
pub const SerializeOptions = serializer.SerializeOptions;
pub const SerializeError = serializer.SerializeError;
pub const serialize = serializer.serialize;
pub const serializeWith = serializer.serializeWith;
pub const serializeNode = serializer.serializeNode;
pub const serializeNodeWith = serializer.serializeNodeWith;

pub fn deinit(self: *AST) void {
    for (self.owned_strings) |string| {
        self.allocator.free(string);
    }
    self.allocator.free(self.owned_strings);
    self.allocator.free(self.nodes);
    // Free only the OUTER slices of the YAML reference-layer side-tables. Their
    // inner strings (tag text, anchor names) alias `self.source` or already live
    // in `owned_strings`; freeing them here would double-free.
    self.allocator.free(self.node_tags);
    self.allocator.free(self.node_anchors);
    self.allocator.free(self.anchors);
    // Free each entry's `leading` slice (its own allocation) then the outer
    // table. Comment *text* aliases `self.source` or lives in `owned_strings`
    // (already freed above), so it is not freed here — same discipline as the
    // reference-layer tables.
    for (self.node_comments) |nc| {
        self.allocator.free(nc.leading);
        self.allocator.free(nc.dangling);
    }
    self.allocator.free(self.node_comments);
}

allocator: Allocator,
owned_strings: []const []const u8 = &.{},

/// Points to first sequence/mapping that contains all other nodes.
root: Node.Id,

/// Complete node tree, such that `ast.nodes[node.id] == node`
nodes: []const Node,

// ── YAML reference/annotation layer (side-tables) ──────────────────────────
// These hold the DECODED data the resolver/materializer/printers need. Printers
// take `*const AST` (never `Document`), so the decoded names/strings must live
// here; only the source *spans* of the `&name`/`!tag` tokens live on `Document`.
// All three are empty (`&.{}`) for documents with no anchors/aliases/tags, and
// for non-YAML formats.

/// Indexed by node id: the decoded tag string attached to that node (e.g.
/// `"!!str"`, `"!foo"`), or null. Empty when the document declares no tags.
node_tags: []const ?[]const u8 = &.{},

/// Indexed by node id: the anchor NAME defined on that node (no leading `&`),
/// or null. Empty when the document declares no anchors.
node_anchors: []const ?[]const u8 = &.{},

/// Anchor definitions in source/id order (redefinition allowed → duplicate
/// names permitted). Consulted by `resolveAlias`/`mergedChild`. Empty when the
/// document declares no anchors.
anchors: []const Anchor = &.{},

// ── Comment layer (side-table) ─────────────────────────────────────────────
// Comments are trivia: they ride alongside a value but are NOT part of it, so
// (like the reference layer above) they live in a node-id-indexed side-table
// and are excluded from `eql`. The reason to carry them in the AST at all is
// cross-format conversion — a comment captured from YAML can be re-emitted as a
// JSON5 `//`/`/* */`. The source marker is therefore NOT stored: only the
// marker-stripped content and whether it was a line or block comment (a hint
// each printer honors or downgrades). Empty (`&.{}`) for comment-free documents.

/// Indexed by node id: the comments bound to that node. Empty when the document
/// carries no comments. Read via `comments(id)`, which guards the table length.
node_comments: []const NodeComments = &.{},

pub const Anchor = struct { name: []const u8, node: Node.Id };

pub const Comment = struct {
    /// Marker-stripped content. A `block` comment may contain newlines; a `line`
    /// comment never does.
    text: []const u8,
    /// The author's original form — a hint. A printer whose target format lacks
    /// the requested form downgrades (a `block` rendered to YAML/TOML becomes a
    /// run of `#` line comments).
    style: Style = .line,

    pub const Style = enum { line, block };

    pub fn eql(self: Comment, other: Comment) bool {
        return self.style == other.style and util.eql(u8, self.text, other.text);
    }
};

/// Comments bound to one node: a run of own-line comments above it (in source
/// order), at most one same-line trailing comment, and — on a container node —
/// a run of `dangling` comments that sit at the END of its body with no child
/// after them (an orphan in an empty `[]`, or comments before the closing brace /
/// at end of document). `dangling` is empty for non-containers.
pub const NodeComments = struct {
    leading: []const Comment = &.{},
    trailing: ?Comment = null,
    dangling: []const Comment = &.{},

    pub fn isEmpty(self: NodeComments) bool {
        return self.leading.len == 0 and self.trailing == null and self.dangling.len == 0;
    }

    pub fn eql(self: NodeComments, other: NodeComments) bool {
        if (self.leading.len != other.leading.len) return false;
        for (self.leading, other.leading) |a, b| if (!a.eql(b)) return false;
        if (self.dangling.len != other.dangling.len) return false;
        for (self.dangling, other.dangling) |a, b| if (!a.eql(b)) return false;
        if ((self.trailing == null) != (other.trailing == null)) return false;
        if (self.trailing) |t| if (!t.eql(other.trailing.?)) return false;
        return true;
    }
};

/// Read the comments bound to `id`, tolerating a short or absent `node_comments`
/// table (returns the empty value for ids past its end).
pub fn comments(self: *const AST, id: Node.Id) NodeComments {
    return if (id < self.node_comments.len) self.node_comments[id] else .{};
}

/// The node a container child's *leading* comment binds to: the key of a
/// `keyvalue` entry (so the comment renders above the key), else the child node
/// itself (a sequence element). Shared by every printer that emits comments.
pub fn leadingCommentAnchor(self: *const AST, id: Node.Id) Node.Id {
    return switch (self.nodes[id].kind) {
        .keyvalue => |kv| kv.key,
        else => id,
    };
}

/// The node a container child's *trailing* comment binds to: the value of a
/// `keyvalue` entry (so the comment renders after the value), else the child
/// node itself.
pub fn trailingCommentAnchor(self: *const AST, id: Node.Id) Node.Id {
    return switch (self.nodes[id].kind) {
        .keyvalue => |kv| kv.value,
        else => id,
    };
}

/// Compare two ASTs' comment layers. Separate from `eql` (which ignores
/// comments) so round-trip tests can assert comments survived.
pub fn commentsEql(self: AST, b: AST) bool {
    const n = @max(self.node_comments.len, b.node_comments.len);
    var i: usize = 0;
    while (i < n) : (i += 1) {
        const x: NodeComments = if (i < self.node_comments.len) self.node_comments[i] else .{};
        const y: NodeComments = if (i < b.node_comments.len) b.node_comments[i] else .{};
        if (!x.eql(y)) return false;
    }
    return true;
}

/// Represents a unit of data in an AST.
pub const Node = struct {
    /// Unique number identifier for this node.
    id: Id,
    kind: Kind,
    /// Indicates "next" value when inside a sequence/mapping.
    next_sibling: ?Id = null,

    pub const Id = u32;
    pub const Kind = union(enum) {
        null_,
        boolean: bool,
        string: []const u8,
        number: Number,
        /// A leaf scalar a source format gives special lexical meaning that the
        /// abstract model has no first-class variant for: TOML datetimes, ZON
        /// enum/char literals, etc. The `kind` tag records which, and `text` is
        /// the value's intrinsic bytes (kept verbatim, like `number.raw` — the
        /// payload IS the value, so it lives inline, not in a side-table). Adding
        /// a new such scalar is a new `ExtKind`, not a new union arm: the outer
        /// switches stay closed; only the printers (where cross-format rendering
        /// is inherently type-specific) gain an `ExtKind` case.
        extended: Extended,
        sequence: ?Id,
        mapping: ?Id,
        keyvalue: struct { key: Id, value: Id },
        /// A `*name` alias: a reference to an anchored node. The payload is the
        /// alias name (no leading `*`); the target is resolved on demand via
        /// `resolveAlias` (kept out of the node so equal documents compare equal
        /// regardless of whether resolution has run). A non-YAML printer never
        /// sees this — `materialize` expands aliases to copied subtrees first.
        alias: []const u8,
        pub const Number = struct {
            raw: []const u8,
            kind: enum { integer, float },
            pub fn eql(self: Number, other: Number) bool {
                return self.kind == other.kind and util.eql(u8, self.raw, other.raw);
            }
        };
        pub const Extended = struct {
            kind: ExtKind,
            text: []const u8,
            /// Which special scalar this is. The first four are RFC-3339-derived
            /// TOML datetimes (`text` is the raw timestamp). `enum_literal`'s
            /// `text` is the decoded name without the leading `.`; `char_literal`'s
            /// `text` is the decimal Unicode codepoint (so non-ZON printers can
            /// treat it as a number, while the ZON printer re-encodes `'A'`).
            pub const ExtKind = enum {
                offset_datetime,
                local_datetime,
                local_date,
                local_time,
                enum_literal,
                char_literal,
                /// A non-finite JSON5 number (`Infinity`, `-Infinity`, `NaN`).
                /// `text` is the source lexeme verbatim, sign included. No JSON
                /// number can hold these, so they ride in an extended scalar.
                number_special,
            };
            pub fn eql(self: Extended, other: Extended) bool {
                return self.kind == other.kind and util.eql(u8, self.text, other.text);
            }
        };
        pub fn eql(self: Kind, other: Kind) bool {
            if (activeTag(self) != activeTag(other)) return false;
            return switch (self) {
                .null_ => true,
                .boolean => |value| value == other.boolean,
                .string => |value| util.eql(u8, value, other.string),
                .number => |value| value.eql(other.number),
                .extended => |value| value.eql(other.extended),
                .sequence => |value| value == other.sequence,
                .mapping => |value| value == other.mapping,
                .keyvalue => |value| value.key == other.keyvalue.key and value.value == other.keyvalue.value,
                .alias => |value| util.eql(u8, value, other.alias),
            };
        }
    };

    pub fn eql(self: Node, other: Node) bool {
        if (self.id != other.id) return false;
        if (self.next_sibling != other.next_sibling) return false;
        if (!self.kind.eql(other.kind)) return false;
        return true;
    }
};

/// Function to tell if two documents are equal abstractly (does not compare source text).
pub fn eql(self: AST, b: AST) bool {
    return self.root == b.root and util.eql(Node, self.nodes, b.nodes);
}

test {
    // Pull the split-out surface into the test binary so each sibling's `test {}`
    // blocks run (only `builder.zig` carries tests today, but keep all three
    // referenced so future tests there are picked up automatically).
    _ = Builder;
    _ = reader;
    _ = serializer;
}