fig 1.0.0 - Docs.rs

//! The native "fig" parser: text → AST, the exact inverse of `native/printer.zig`.
//!
//! Recursive-descent over the byte stream (the grammar is small enough that the
//! token framework the format parsers share would be overkill). It reconstructs
//! every `Node.Kind` arm and the YAML reference layer (anchors `&name`, tags
//! `!tag`, aliases `*name`) into the same side-tables the format parsers
//! populate, so `print` ∘ `parse` is the identity on any AST.
//!
//! Strings, numbers, anchor names and tag text borrow `input` where possible
//! (only escaped strings allocate, landing in the AST's `owned_strings`), so the
//! returned AST is valid only while `input` outlives it — same contract as the
//! JSON parser.

const Parser = @This();

const std = @import("std");
const AST = @import("../ast/ast.zig");
const Document = @import("../document.zig");
const Span = @import("../util/span.zig");
const Printer = @import("printer.zig");

const ExtKind = AST.Node.Kind.Extended.ExtKind;

allocator: std.mem.Allocator,
src: []const u8,
pos: usize = 0,
/// Current container-nesting depth, bounded by `Printer.max_depth` so a
/// pathologically nested input can't overflow the recursive descent's stack.
depth: usize = 0,

nodes: std.ArrayList(AST.Node) = .empty,
spans: std.ArrayList(Span) = .empty,
owned_strings: std.ArrayList([]const u8) = .empty,
// Reference layer, grown in lockstep with `nodes` (a null per node) and patched
// when a prefix is seen. Only materialized into the AST when `ref_seen`.
node_anchors: std.ArrayList(?[]const u8) = .empty,
node_tags: std.ArrayList(?[]const u8) = .empty,
anchors: std.ArrayList(AST.Anchor) = .empty,
ref_seen: bool = false,
// Comment layer, also grown in lockstep with `nodes`. `pending_leading` buffers
// own-line comments seen while skipping trivia until the next node claims them;
// trailing comments are set directly. Materialized only when `comments_seen`.
// Comment text borrows `src` (comments never contain escapes), so nothing here
// is owned except the per-node `leading` slices handed off at `claimLeading`.
node_comments: std.ArrayList(AST.NodeComments) = .empty,
pending_leading: std.ArrayList(AST.Comment) = .empty,
comments_seen: bool = false,

pub const ParseError = error{
    UnexpectedToken,
    UnexpectedEnd,
    UnclosedString,
    UnclosedArray,
    UnclosedObject,
    UnterminatedComment,
    ExpectedColon,
    InvalidExtended,
    InvalidEscape,
    InvalidUnicodeEscape,
    EmptyName,
    TrailingGarbage,
    NestingTooDeep,
};
pub const ParserError = ParseError || std.mem.Allocator.Error;

/// Parse `input` into an owned `AST`. Free with `ast.deinit()`.
pub fn parseAbstract(allocator: std.mem.Allocator, input: []const u8) ParserError!AST {
    const doc = try parse(allocator, input);
    allocator.free(doc.node_spans);
    return doc.ast;
}

/// Parse `input` into a `Document` (AST + source spans). Free with
/// `doc.deinit(allocator)`.
pub fn parse(allocator: std.mem.Allocator, input: []const u8) ParserError!Document {
    var parser: Parser = .{ .allocator = allocator, .src = input };
    defer parser.deinit();
    return parser.parseOnce();
}

pub fn deinit(self: *Parser) void {
    self.nodes.deinit(self.allocator);
    self.spans.deinit(self.allocator);
    for (self.owned_strings.items) |s| self.allocator.free(s);
    self.owned_strings.deinit(self.allocator);
    self.node_anchors.deinit(self.allocator);
    self.node_tags.deinit(self.allocator);
    self.anchors.deinit(self.allocator);
    // Free any `leading` slices still owned here. After a successful
    // `parseOnce` these moved to the AST and the list is empty; on an error
    // path they are freed here. Comment text borrows `src`, so it is not freed.
    for (self.node_comments.items) |nc| self.allocator.free(nc.leading);
    self.node_comments.deinit(self.allocator);
    self.pending_leading.deinit(self.allocator);
}

fn parseOnce(self: *Parser) ParserError!Document {
    const root = try self.parseNode();
    try self.claimLeading(root);
    // A trailing comment on the same line as the root, then the document's final
    // trivia; any end-of-file orphan comments become the root's dangling run.
    self.skipInline();
    if (try self.tryComment()) |c| self.setTrailing(root, c);
    try self.collectLeading();
    try self.claimDangling(root);
    if (self.peek() != null) return error.TrailingGarbage;

    const nodes = try self.nodes.toOwnedSlice(self.allocator);
    self.nodes = .empty;
    errdefer self.allocator.free(nodes);

    const spans = try self.spans.toOwnedSlice(self.allocator);
    self.spans = .empty;
    errdefer self.allocator.free(spans);

    const owned_strings = try self.owned_strings.toOwnedSlice(self.allocator);
    self.owned_strings = .empty;
    errdefer self.allocator.free(owned_strings);

    var ast: AST = .{
        .allocator = self.allocator,
        .owned_strings = owned_strings,
        .root = root,
        .nodes = nodes,
    };

    // Only documents that actually used the reference layer carry the
    // side-tables; everything else leaves the AST's `&.{}` defaults.
    if (self.ref_seen) {
        ast.node_anchors = try self.node_anchors.toOwnedSlice(self.allocator);
        self.node_anchors = .empty;
        ast.node_tags = try self.node_tags.toOwnedSlice(self.allocator);
        self.node_tags = .empty;
    }
    if (self.anchors.items.len > 0) {
        // `resolveAlias` requires anchors sorted by node id (it walks until it
        // passes the alias). Inner anchors finish before outer ones, so sort.
        std.mem.sort(AST.Anchor, self.anchors.items, {}, anchorLess);
        ast.anchors = try self.anchors.toOwnedSlice(self.allocator);
        self.anchors = .empty;
    }
    // Materialized last so no fallible step follows it: hands the owned `leading`
    // slices to the AST in one move. Done only when comments were actually seen,
    // leaving the AST's `&.{}` default otherwise.
    if (self.comments_seen) {
        ast.node_comments = try self.node_comments.toOwnedSlice(self.allocator);
        self.node_comments = .empty;
    }

    return .{ .source = self.src, .ast = ast, .node_spans = spans };
}

fn anchorLess(_: void, a: AST.Anchor, b: AST.Anchor) bool {
    return a.node < b.node;
}

// ── node construction ───────────────────────────────────────────────────────

fn addNode(self: *Parser, kind: AST.Node.Kind, start: usize) ParserError!AST.Node.Id {
    const id: AST.Node.Id = @intCast(self.nodes.items.len);
    try self.nodes.append(self.allocator, .{ .id = id, .kind = kind, .next_sibling = null });
    try self.spans.append(self.allocator, .{ .start = start, .end = self.pos });
    try self.node_anchors.append(self.allocator, null);
    try self.node_tags.append(self.allocator, null);
    try self.node_comments.append(self.allocator, .{});
    return id;
}

// ── grammar ─────────────────────────────────────────────────────────────────

/// A node is any reference-layer prefixes (`&anchor`, `!tag`) followed by a
/// value. Leading comments encountered here accumulate in `pending_leading`;
/// the enclosing container (or the document root) claims them onto the right
/// node via `claimLeading` once it knows which node they belong to.
fn parseNode(self: *Parser) ParserError!AST.Node.Id {
    try self.collectLeading();
    var anchor: ?[]const u8 = null;
    var tag: ?[]const u8 = null;
    while (true) {
        switch (self.peek() orelse return error.UnexpectedEnd) {
            '&' => {
                if (anchor != null) return error.UnexpectedToken;
                anchor = try self.parseAnchorName();
                self.skipWs();
            },
            '!' => {
                if (tag != null) return error.UnexpectedToken;
                tag = self.parseTag();
                self.skipWs();
            },
            else => break,
        }
    }

    const id = try self.parseValue();

    if (anchor) |name| {
        self.ref_seen = true;
        self.node_anchors.items[id] = name;
        try self.anchors.append(self.allocator, .{ .name = name, .node = id });
    }
    if (tag) |text| {
        self.ref_seen = true;
        self.node_tags.items[id] = text;
    }
    return id;
}

fn parseValue(self: *Parser) ParserError!AST.Node.Id {
    const start = self.pos;
    switch (self.peek() orelse return error.UnexpectedEnd) {
        '{' => return self.parseMapping(),
        '[' => return self.parseSequence(),
        '"' => {
            const s = try self.parseStringValue();
            return self.addNode(.{ .string = s }, start);
        },
        '@' => return self.parseExtended(),
        '*' => return self.parseAlias(),
        '0'...'9', '+', '-', '.', '~' => return self.parseNumber(),
        'a'...'z', 'A'...'Z', '_' => return self.parseBareword(),
        else => return error.UnexpectedToken,
    }
}

fn parseBareword(self: *Parser) ParserError!AST.Node.Id {
    const start = self.pos;
    while (self.peek()) |c| : (self.pos += 1) {
        if (!isNameChar(c)) break;
    }
    const word = self.src[start..self.pos];
    const kind: AST.Node.Kind = if (std.mem.eql(u8, word, "null"))
        .null_
    else if (std.mem.eql(u8, word, "true"))
        .{ .boolean = true }
    else if (std.mem.eql(u8, word, "false"))
        .{ .boolean = false }
    else
        return error.UnexpectedToken;
    return self.addNode(kind, start);
}

fn parseNumber(self: *Parser) ParserError!AST.Node.Id {
    const start = self.pos;
    // Optional `~i`/`~f` kind override for the rare lexeme/kind mismatch.
    var override: ?bool = null;
    if (self.peek() == '~') {
        self.pos += 1;
        switch (self.peek() orelse return error.UnexpectedEnd) {
            'f' => override = true,
            'i' => override = false,
            else => return error.UnexpectedToken,
        }
        self.pos += 1;
    }
    const raw_start = self.pos;
    while (self.peek()) |c| : (self.pos += 1) {
        if (!isNumberChar(c)) break;
    }
    const raw = self.src[raw_start..self.pos];
    if (raw.len == 0) return error.UnexpectedToken;
    const NumberKind = @TypeOf(Printer.impliedNumberKind(raw));
    const kind: NumberKind = if (override) |is_float|
        (if (is_float) .float else .integer)
    else
        Printer.impliedNumberKind(raw);
    return self.addNode(.{ .number = .{ .raw = raw, .kind = kind } }, start);
}

fn parseExtended(self: *Parser) ParserError!AST.Node.Id {
    const start = self.pos;
    self.pos += 1; // '@'
    const kind_start = self.pos;
    while (self.peek()) |c| : (self.pos += 1) {
        if (!((c >= 'a' and c <= 'z') or c == '_')) break;
    }
    const kind_name = self.src[kind_start..self.pos];
    const kind = std.meta.stringToEnum(ExtKind, kind_name) orelse return error.InvalidExtended;
    self.skipWs();
    if (self.peek() != '"') return error.InvalidExtended;
    const text = try self.parseStringValue();
    return self.addNode(.{ .extended = .{ .kind = kind, .text = text } }, start);
}

fn parseAlias(self: *Parser) ParserError!AST.Node.Id {
    const start = self.pos;
    self.pos += 1; // '*'
    const name_start = self.pos;
    while (self.peek()) |c| : (self.pos += 1) {
        if (!isNameChar(c)) break;
    }
    const name = self.src[name_start..self.pos];
    if (name.len == 0) return error.EmptyName;
    return self.addNode(.{ .alias = name }, start);
}

fn parseSequence(self: *Parser) ParserError!AST.Node.Id {
    const start = self.pos;
    self.pos += 1; // '['
    self.depth += 1;
    if (self.depth > Printer.max_depth) return error.NestingTooDeep;
    defer self.depth -= 1;
    const id = try self.addNode(.{ .sequence = null }, start);
    try self.captureOpenTrailing(id);
    try self.collectLeading();
    if (self.peek() == ']') {
        self.pos += 1;
        try self.claimDangling(id); // orphan comments inside an empty `[ … ]`
        self.spans.items[id].end = self.pos;
        return id;
    }
    var first: ?AST.Node.Id = null;
    var prev: ?AST.Node.Id = null;
    while (true) {
        const child = try self.parseNode();
        // A sequence element is its own value node: leading and trailing
        // comments both bind directly to it.
        try self.claimLeading(child);
        if (prev) |p| self.nodes.items[p].next_sibling = child else first = child;
        prev = child;
        if (try self.finishElement(']', error.UnclosedArray, child)) {
            try self.claimDangling(id); // orphan comments before the `]`
            break;
        }
    }
    self.nodes.items[id].kind = .{ .sequence = first };
    self.spans.items[id].end = self.pos;
    return id;
}

fn parseMapping(self: *Parser) ParserError!AST.Node.Id {
    const start = self.pos;
    self.pos += 1; // '{'
    self.depth += 1;
    if (self.depth > Printer.max_depth) return error.NestingTooDeep;
    defer self.depth -= 1;
    const id = try self.addNode(.{ .mapping = null }, start);
    try self.captureOpenTrailing(id);
    try self.collectLeading();
    if (self.peek() == '}') {
        self.pos += 1;
        try self.claimDangling(id); // orphan comments inside an empty `{ … }`
        self.spans.items[id].end = self.pos;
        return id;
    }
    var first: ?AST.Node.Id = null;
    var prev: ?AST.Node.Id = null;
    while (true) {
        const key = try self.parseNode();
        // An entry's leading comment sits above the key; its trailing comment
        // follows the value. This mirrors the native printer's anchors.
        try self.claimLeading(key);
        self.skipWs();
        if (self.peek() != ':') return error.ExpectedColon;
        self.pos += 1;
        const value = try self.parseNode();
        // Normally empty; claims any (non-canonical) comment between `:` and the
        // value so it can't leak onto the next entry's key.
        try self.claimLeading(value);
        const kv = try self.addNode(.{ .keyvalue = .{ .key = key, .value = value } }, self.spans.items[key].start);
        if (prev) |p| self.nodes.items[p].next_sibling = kv else first = kv;
        prev = kv;
        if (try self.finishElement('}', error.UnclosedObject, value)) {
            try self.claimDangling(id); // orphan comments before the `}`
            break;
        }
    }
    self.nodes.items[id].kind = .{ .mapping = first };
    self.spans.items[id].end = self.pos;
    return id;
}

// ── lexical helpers ─────────────────────────────────────────────────────────

fn parseAnchorName(self: *Parser) ParserError![]const u8 {
    self.pos += 1; // '&'
    const start = self.pos;
    while (self.peek()) |c| : (self.pos += 1) {
        if (!isNameChar(c)) break;
    }
    const name = self.src[start..self.pos];
    if (name.len == 0) return error.EmptyName;
    return name;
}

/// A tag is the verbatim `!`-led token (e.g. `!!str`, `!foo`), stored leading
/// `!` included to match how the format parsers fill `node_tags`. It runs to the
/// next whitespace or structural delimiter.
fn parseTag(self: *Parser) []const u8 {
    const start = self.pos;
    while (self.peek()) |c| : (self.pos += 1) {
        if (!isTagChar(c)) break;
    }
    return self.src[start..self.pos];
}

fn parseStringValue(self: *Parser) ParserError![]const u8 {
    // self.peek() == '"'
    self.pos += 1; // opening quote
    const inner_start = self.pos;
    var i = self.pos;
    var has_escape = false;
    while (i < self.src.len) {
        const ch = self.src[i];
        if (ch == '"') break;
        if (ch == '\\') {
            has_escape = true;
            i += 2; // skip the escaped char; the bounds check re-runs next loop
            continue;
        }
        i += 1;
    }
    if (i >= self.src.len) return error.UnclosedString;
    const inner = self.src[inner_start..i];
    self.pos = i + 1; // past closing quote

    if (!has_escape) return inner; // fast path: borrow from source
    return self.decodeEscapes(inner);
}

fn decodeEscapes(self: *Parser, inner: []const u8) ParserError![]const u8 {
    var decoded: std.ArrayList(u8) = .empty;
    errdefer decoded.deinit(self.allocator);
    var i: usize = 0;
    while (i < inner.len) {
        const c = inner[i];
        if (c != '\\') {
            try decoded.append(self.allocator, c);
            i += 1;
            continue;
        }
        i += 1;
        if (i >= inner.len) return error.UnclosedString;
        switch (inner[i]) {
            '"' => try decoded.append(self.allocator, '"'),
            '\\' => try decoded.append(self.allocator, '\\'),
            '/' => try decoded.append(self.allocator, '/'),
            'b' => try decoded.append(self.allocator, 0x08),
            'f' => try decoded.append(self.allocator, 0x0c),
            'n' => try decoded.append(self.allocator, '\n'),
            'r' => try decoded.append(self.allocator, '\r'),
            't' => try decoded.append(self.allocator, '\t'),
            'u' => {
                if (i + 4 >= inner.len) return error.InvalidUnicodeEscape;
                const unit = std.fmt.parseInt(u21, inner[i + 1 .. i + 5], 16) catch return error.InvalidUnicodeEscape;
                var buf: [4]u8 = undefined;
                const n = std.unicode.utf8Encode(unit, &buf) catch return error.InvalidUnicodeEscape;
                try decoded.appendSlice(self.allocator, buf[0..n]);
                i += 4;
            },
            else => return error.InvalidEscape,
        }
        i += 1;
    }
    const owned = try decoded.toOwnedSlice(self.allocator);
    errdefer self.allocator.free(owned);
    try self.owned_strings.append(self.allocator, owned);
    return owned;
}

fn skipWs(self: *Parser) void {
    while (self.peek()) |c| {
        switch (c) {
            ' ', '\t', '\n', '\r' => self.pos += 1,
            else => break,
        }
    }
}

/// Skip spaces and tabs only — never a newline, never a comment. Used to reach a
/// same-line trailing comment or a separator without crossing into the next
/// line's leading trivia.
fn skipInline(self: *Parser) void {
    while (self.peek()) |c| {
        if (c == ' ' or c == '\t') self.pos += 1 else break;
    }
}

/// Skip whitespace (newlines included) and comments, buffering each comment in
/// `pending_leading` for the next node to claim. Returns at the next value byte.
fn collectLeading(self: *Parser) ParserError!void {
    while (true) {
        self.skipWs();
        const c = try self.tryComment() orelse return;
        try self.pending_leading.append(self.allocator, c);
        // `comments_seen` is set only once a comment actually binds to a node
        // (in `claimLeading`/`setTrailing`), so a buffered-then-dropped orphan
        // leaves the document comment-free.
    }
}

/// If positioned at a comment marker, consume the whole comment and return it;
/// otherwise leave `pos` unchanged and return null. The returned text borrows
/// `src` (marker stripped, surrounding whitespace trimmed) — comments never
/// contain escapes, so nothing is allocated.
fn tryComment(self: *Parser) ParserError!?AST.Comment {
    if (self.peek() != '/' or self.pos + 1 >= self.src.len) return null;
    switch (self.src[self.pos + 1]) {
        '/' => {
            self.pos += 2;
            const s = self.pos;
            while (self.peek()) |c| : (self.pos += 1) {
                if (c == '\n') break;
            }
            return .{ .text = std.mem.trim(u8, self.src[s..self.pos], " \t\r"), .style = .line };
        },
        '*' => {
            self.pos += 2;
            const s = self.pos;
            const len = std.mem.indexOf(u8, self.src[self.pos..], "*/") orelse return error.UnterminatedComment;
            self.pos += len + 2; // past the closing `*/`
            return .{ .text = std.mem.trim(u8, self.src[s .. s + len], " \t\r\n"), .style = .block };
        },
        else => return null,
    }
}

/// Hand the buffered leading comments to node `id`, transferring ownership of
/// the slice to the comment table. No-op when nothing is buffered.
fn claimLeading(self: *Parser, id: AST.Node.Id) ParserError!void {
    if (self.pending_leading.items.len == 0) return;
    const owned = try self.pending_leading.toOwnedSlice(self.allocator);
    self.pending_leading = .empty;
    self.node_comments.items[id].leading = owned;
    self.comments_seen = true;
}

/// Bind a same-line trailing comment to node `id`.
fn setTrailing(self: *Parser, id: AST.Node.Id, c: AST.Comment) void {
    self.node_comments.items[id].trailing = c;
    self.comments_seen = true;
}

/// Capture a line comment immediately after an opening delimiter that ends its
/// line (`[ // c`) as container `id`'s own trailing (the head comment). A block
/// comment is left for `collectLeading` — it may lead the first child.
fn captureOpenTrailing(self: *Parser, id: AST.Node.Id) ParserError!void {
    self.skipInline();
    if (self.peek() == '/' and self.pos + 1 < self.src.len and self.src[self.pos + 1] == '/') {
        if (try self.tryComment()) |c| self.setTrailing(id, c);
    }
}

/// Whether `id` is a container whose opening delimiter precedes `cpos` on an
/// earlier line — a multi-line `[ … ]`/`{ … }` whose close is on `cpos`'s line.
fn multilineContainer(self: *Parser, id: AST.Node.Id, cpos: usize) bool {
    switch (self.nodes.items[id].kind) {
        .sequence, .mapping => {},
        else => return false,
    }
    const open = self.spans.items[id].start;
    if (cpos <= open) return false;
    return std.mem.indexOfScalar(u8, self.src[open..cpos], '\n') != null;
}

/// Append one comment to `id`'s `dangling` run (reallocating onto any orphans
/// already claimed at the close).
fn appendDangling(self: *Parser, id: AST.Node.Id, c: AST.Comment) ParserError!void {
    const old = self.node_comments.items[id].dangling;
    const grown = try self.allocator.alloc(AST.Comment, old.len + 1);
    @memcpy(grown[0..old.len], old);
    grown[old.len] = c;
    self.allocator.free(old);
    self.node_comments.items[id].dangling = grown;
    self.comments_seen = true;
}

/// Hand buffered orphan comments (no node followed them) to container `id` as its
/// `dangling` run — they sit at the end of its body. No-op when nothing buffered.
fn claimDangling(self: *Parser, id: AST.Node.Id) ParserError!void {
    if (self.pending_leading.items.len == 0) return;
    const owned = try self.pending_leading.toOwnedSlice(self.allocator);
    self.pending_leading = .empty;
    self.node_comments.items[id].dangling = owned;
    self.comments_seen = true;
}

/// After an element's value is parsed (and its leading claimed), consume an
/// optional same-line trailing comment plus the separator. Binds the trailing
/// comment to `trailing_target`. Returns true when the container closed (`close`
/// consumed), false when another element follows. The canonical native form
/// puts a trailing comment after the comma (`v, // c`); a last element carries
/// it directly (`v // c`).
fn finishElement(self: *Parser, close: u8, unclosed: ParseError, trailing_target: AST.Node.Id) ParserError!bool {
    self.skipInline();
    var had_comma = false;
    if (self.peek() == ',') {
        self.pos += 1;
        had_comma = true;
        self.skipInline();
    }
    const cpos = self.pos;
    if (try self.tryComment()) |c| {
        // A comment after a multi-line container's close is a bottom comment →
        // its `dangling` run; a scalar or inline container keeps the trailing.
        if (self.multilineContainer(trailing_target, cpos)) {
            try self.appendDangling(trailing_target, c);
        } else {
            self.setTrailing(trailing_target, c);
        }
    }
    try self.collectLeading();
    const c = self.peek() orelse return unclosed;
    if (c == close) {
        self.pos += 1;
        // Any orphan comments before the close stay buffered; the caller claims
        // them as the container's `dangling` run.
        return true;
    }
    // More content is only legal across a comma separator.
    if (had_comma) return false;
    return error.UnexpectedToken;
}

fn peek(self: *const Parser) ?u8 {
    return if (self.pos < self.src.len) self.src[self.pos] else null;
}

fn isNameChar(c: u8) bool {
    return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or (c >= '0' and c <= '9') or c == '_' or c == '-';
}

fn isNumberChar(c: u8) bool {
    return (c >= '0' and c <= '9') or
        (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') or
        c == 'x' or c == 'X' or c == 'o' or c == 'O' or c == 'b' or c == 'B' or
        c == '.' or c == '_' or c == '+' or c == '-';
}

fn isTagChar(c: u8) bool {
    return switch (c) {
        ' ', '\t', '\n', '\r', ',', ':', '{', '}', '[', ']', '"' => false,
        else => true,
    };
}

// =======
// Testing
// =======

const testing = std.testing;

/// Assert that `input` round-trips: parse → print → parse yields an equal AST,
/// and the re-printed text is byte-identical to the first print.
fn expectRoundTrip(input: []const u8) !void {
    var ast = try parseAbstract(testing.allocator, input);
    defer ast.deinit();

    var out1: std.Io.Writer.Allocating = .init(testing.allocator);
    defer out1.deinit();
    try Printer.print(&out1.writer, &ast);

    var reparsed = try parseAbstract(testing.allocator, out1.written());
    defer reparsed.deinit();
    try testing.expect(ast.eql(reparsed));
    // Comments are excluded from `eql`, so assert them separately.
    try testing.expect(ast.commentsEql(reparsed));

    var out2: std.Io.Writer.Allocating = .init(testing.allocator);
    defer out2.deinit();
    try Printer.print(&out2.writer, &reparsed);
    try testing.expectEqualStrings(out1.written(), out2.written());
}

test "round-trips comments: leading, trailing, line, block" {
    // Canonical form (what the printer emits), so the byte-identical reprint
    // check applies in full.
    try expectRoundTrip(
        \\{
        \\  // leading on first entry
        \\  "name": "fig", // trailing on a value
        \\  /* a block comment */
        \\  "port": 8080,
        \\  "nums": [
        \\    // leading on a sequence element
        \\    1,
        \\    2 // trailing on the last element
        \\  ]
        \\}
    );
}

test "captures comment attachment onto the right nodes" {
    var ast = try parseAbstract(testing.allocator,
        \\{
        \\  // c1
        \\  "a": 1, // c2
        \\  "b": [2 /* c3 */]
        \\}
    );
    defer ast.deinit();

    const root = ast.nodes[ast.root];
    const kv_a = ast.nodes[root.kind.mapping.?].kind.keyvalue;
    // Leading binds to the key node, trailing to the value node.
    try testing.expectEqualStrings("c1", ast.comments(kv_a.key).leading[0].text);
    try testing.expect(ast.comments(kv_a.key).leading[0].style == .line);
    try testing.expectEqualStrings("c2", ast.comments(kv_a.value).trailing.?.text);

    const kv_b = ast.nodes[ast.nodes[root.kind.mapping.?].next_sibling.?].kind.keyvalue;
    const elem = ast.nodes[ast.nodes[kv_b.value].kind.sequence.?];
    try testing.expectEqualStrings("c3", ast.comments(elem.id).trailing.?.text);
    try testing.expect(ast.comments(elem.id).trailing.?.style == .block);
}

test "comment-free document carries no comment table" {
    var ast = try parseAbstract(testing.allocator, "[1, 2, 3]");
    defer ast.deinit();
    try testing.expectEqual(@as(usize, 0), ast.node_comments.len);
}

test "orphan comments are captured as the container's dangling run" {
    try expectRoundTrip("[]"); // sanity: empty container still round-trips
    // An own-line comment at the bottom of a container binds as `dangling`.
    var ast = try parseAbstract(testing.allocator, "[\n  // orphan\n]");
    defer ast.deinit();
    try testing.expectEqualStrings("orphan", ast.comments(ast.root).dangling[0].text);
    // And it round-trips (printed inside the block-form container).
    try expectRoundTrip(
        \\{
        \\  "a": 1
        \\  // dangling at end
        \\}
    );
}

test "container comment: opening line is head (trailing), closing line is bottom (dangling)" {
    // `[ // head` rides the open line; `] // tail` normalizes to a bottom comment.
    try expectRoundTrip(
        \\{
        \\  "a": [ // head
        \\    1
        \\  ],
        \\  "b": [
        \\    2
        \\    // tail
        \\  ]
        \\}
    );
}

test "round-trips scalars, containers, and node keys" {
    try expectRoundTrip(
        \\{
        \\  "name": "fig",
        \\  "port": 8080,
        \\  "ratio": 1.0,
        \\  "hex": 0xFF,
        \\  "grouped": 1_000,
        \\  "signed": +42,
        \\  "nums": [1, 2.5, .5, 5., 1e9],
        \\  "missing": null,
        \\  "flag": true,
        \\  "nested": { "a": [true, false] }
        \\}
    );
}

test "round-trips extended scalars" {
    try expectRoundTrip(
        \\{
        \\  "dt": @offset_datetime "1979-05-27T07:32:00Z",
        \\  "d": @local_date "1979-05-27",
        \\  "mode": @enum_literal "fast",
        \\  "ch": @char_literal "65",
        \\  "inf": @number_special "Infinity"
        \\}
    );
}

test "round-trips anchors, aliases, and tags" {
    try expectRoundTrip(
        \\[&base { "retries": 3 }, *base, !!str "tagged"]
    );
}

test "string escapes decode and re-encode" {
    var ast = try parseAbstract(testing.allocator, "\"tab:\\t quote:\\\" backslash:\\\\ ctrl:\\u0007\"");
    defer ast.deinit();
    try testing.expectEqualStrings("tab:\t quote:\" backslash:\\ ctrl:\x07", ast.nodes[ast.root].kind.string);
}

test "non-string mapping keys" {
    var ast = try parseAbstract(testing.allocator, "{ [1, 2]: \"tuple\" }");
    defer ast.deinit();
    const root = ast.nodes[ast.root];
    const kv = ast.nodes[root.kind.mapping.?].kind.keyvalue;
    try testing.expect(ast.nodes[kv.key].kind == .sequence);
}

test "kind override survives a lexeme/kind mismatch" {
    // A float whose lexeme reads as an integer: only `~f` preserves it.
    var b = AST.Builder.init(testing.allocator);
    defer b.deinit();
    const root = try b.addNumberRaw("1", true); // raw "1", but kind=float
    var ast = try b.finish(root);
    defer ast.deinit();

    var out: std.Io.Writer.Allocating = .init(testing.allocator);
    defer out.deinit();
    try Printer.print(&out.writer, &ast);
    try testing.expectEqualStrings("~f1\n", out.written());

    var reparsed = try parseAbstract(testing.allocator, out.written());
    defer reparsed.deinit();
    try testing.expect(reparsed.nodes[reparsed.root].kind.number.kind == .float);
}

test "alias resolves to its anchor" {
    var ast = try parseAbstract(testing.allocator, "[&a 1, *a]");
    defer ast.deinit();
    const seq = ast.nodes[ast.root];
    const alias_node = ast.nodes[ast.nodes[seq.kind.sequence.?].next_sibling.?];
    try testing.expect(alias_node.kind == .alias);
    const target = try ast.resolveAlias(alias_node);
    try testing.expectEqualStrings("1", ast.nodes[target].kind.number.raw);
}

test "rejects trailing garbage" {
    try testing.expectError(error.TrailingGarbage, parseAbstract(testing.allocator, "1 2"));
}

test "bounds nesting depth" {
    const a = testing.allocator;
    // `n` nested sequences: `[`×n then `]`×n (innermost is an empty `[]`).
    const nest = struct {
        fn build(alloc: std.mem.Allocator, n: usize) ![]u8 {
            const buf = try alloc.alloc(u8, n * 2);
            @memset(buf[0..n], '[');
            @memset(buf[n..], ']');
            return buf;
        }
    }.build;

    // At the limit it parses; one level past it is rejected (not a stack crash).
    const ok = try nest(a, Printer.max_depth);
    defer a.free(ok);
    var ast = try parseAbstract(a, ok);
    ast.deinit();

    const too_deep = try nest(a, Printer.max_depth + 1);
    defer a.free(too_deep);
    try testing.expectError(error.NestingTooDeep, parseAbstract(a, too_deep));
}