fig 1.0.0

Parse, edit, and convert config files while preserving comments. Supports JSON, YAML, TOML, and more.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
//! ZON (Zig Object Notation) parser.
//!
//! Unlike the JSON/YAML parsers, this one hand-rolls NO tokenizer. ZON is a
//! subset of Zig source, so we lean on the Zig standard library's own parser:
//! `std.zig.Ast.parse(.zon)` produces a lossless syntax tree — token spans and
//! raw source bytes preserved — which we walk into fig's AST. This is the one
//! format where the stdlib carries the lexing/parsing for us, precisely because
//! `std.zig.Ast` is a formatter-grade *concrete* syntax tree (it backs `zig
//! fmt`), so it keeps the source fidelity fig's Document/span model needs.
//!
//! Enum and char literals — types the abstract model has no plain variant for —
//! are preserved as `extended` nodes (`ExtKind.enum_literal` / `.char_literal`),
//! so they round-trip through ZON losslessly. A non-ZON printer renders them
//! best-effort: an enum literal as a string (`"foo"`), a char as its codepoint.
//!
//! Remaining best-effort, niche behavior:
//!   * `inf` / `nan` become float numbers with verbatim raw text; a JSON printer
//!     will then emit non-conformant `inf`/`nan` tokens.
//!   * Non-decimal / underscored numbers (`0x1F`, `1_000`) keep their raw text,
//!     which is valid ZON but NOT valid JSON when converted.
//!
//! This parser is coupled to the Zig compiler's internal `std.zig.Ast` API,
//! which shifts between releases — the deliberate trade for the cheap impl.

const Parser = @This();

const std = @import("std");
const testing = std.testing;
const Ast = std.zig.Ast;
const AST = @import("../ast/ast.zig");
const Document = @import("../document.zig");
const Type = @import("zon.zig").Type;
const Span = @import("../util/span.zig");

allocator: std.mem.Allocator,
/// Borrowed: the original (non-sentinel) input. AST string/number payloads that
/// point into source MUST slice this — never the sentinel dup `Ast` tokenizes,
/// which is freed before `parse` returns. Byte offsets are identical between the
/// two, so spans from the tree index this slice unchanged.
source: []const u8 = "",
tree: *const Ast = undefined,
nodes: std.ArrayList(AST.Node) = .empty,
node_spans: std.ArrayList(Span) = .empty,
owned_strings: std.ArrayList([]const u8) = .empty,
// Comment layer. `std.zig.Ast` discards `//` comments (its tokenizer treats them
// as whitespace), so they're recovered by byte-scanning the source *gaps*
// between nodes — `absorbCommentsUpTo(pos)` scans `[scan_pos, pos)`, which holds
// only punctuation/whitespace/comments (never a string or char literal, since
// those are values the walk jumps `scan_pos` over). A comment on the same line
// as a just-finished value trails it; one on its own line buffers as leading.
scan_pos: usize = 0,
node_comments: std.ArrayList(AST.NodeComments) = .empty,
pending_leading: std.ArrayList(AST.Comment) = .empty,
last_value_id: ?AST.Node.Id = null,
comments_seen: bool = false,

pub const Error = error{ InvalidZon, UnsupportedZon } || std.mem.Allocator.Error;

/// Parse `input` into an `AST` only, discarding source spans. Mirrors the other
/// languages' `parseAbstract` so tests can compare abstract shape.
pub fn parseAbstract(allocator: std.mem.Allocator, input: []const u8, format: Type) !AST {
    const parsed = try parse(allocator, input, format);
    allocator.free(parsed.node_spans);
    return parsed.ast;
}

pub fn parse(allocator: std.mem.Allocator, input: []const u8, format: Type) Error!Document {
    _ = format;

    // `std.zig.Ast.parse` requires a null-terminated source. The dup is byte
    // identical to `input`, so all token offsets line up with `input`.
    const sentinel = try allocator.dupeZ(u8, input);
    defer allocator.free(sentinel);

    var tree = try Ast.parse(allocator, sentinel, .zon);
    defer tree.deinit(allocator);
    if (tree.errors.len > 0) return error.InvalidZon;

    const root_decls = tree.rootDecls();
    if (root_decls.len == 0) return error.InvalidZon;

    var self: Parser = .{ .allocator = allocator, .source = input, .tree = &tree };
    errdefer self.deinit();

    // A leading comment block above the whole document binds to the root (a
    // container root passes it through to its first key inside `walk`).
    try self.absorbCommentsUpTo(self.nodeSpan(root_decls[0]).start);
    const root = try self.walk(root_decls[0]);
    try self.claimLeading(root);
    self.last_value_id = root;
    self.scan_pos = self.node_spans.items[root].end;
    try self.absorbCommentsUpTo(input.len); // a trailing comment after the root

    const nodes = try self.nodes.toOwnedSlice(allocator);
    errdefer allocator.free(nodes);
    self.nodes = .empty;

    const node_spans = try self.node_spans.toOwnedSlice(allocator);
    errdefer allocator.free(node_spans);
    self.node_spans = .empty;

    const owned_strings = try self.owned_strings.toOwnedSlice(allocator);
    self.owned_strings = .empty;

    var result: AST = .{
        .allocator = allocator,
        .owned_strings = owned_strings,
        .root = root,
        .nodes = nodes,
    };
    if (self.comments_seen) {
        result.node_comments = try self.node_comments.toOwnedSlice(allocator);
        self.node_comments = .empty;
    }

    // Success path: `errdefer self.deinit()` won't run, so free the comment
    // scratch lists here (the owned `leading` slices, if any, moved into the AST
    // above and left `node_comments` empty).
    for (self.node_comments.items) |nc| allocator.free(nc.leading);
    self.node_comments.deinit(allocator);
    self.pending_leading.deinit(allocator);

    return .{
        .source = input,
        .ast = result,
        .node_spans = node_spans,
    };
}

pub fn deinit(self: *Parser) void {
    self.nodes.deinit(self.allocator);
    self.node_spans.deinit(self.allocator);
    for (self.owned_strings.items) |s| self.allocator.free(s);
    self.owned_strings.deinit(self.allocator);
    // After a successful parse the `leading` slices moved to the AST and the list
    // is empty; on an error path they are freed here. Text borrows `source`.
    for (self.node_comments.items) |nc| self.allocator.free(nc.leading);
    self.node_comments.deinit(self.allocator);
    self.pending_leading.deinit(self.allocator);
}

// =========
// THE WALK
// =========

fn walk(self: *Parser, node: Ast.Node.Index) Error!AST.Node.Id {
    const tree = self.tree;
    switch (tree.nodeTag(node)) {
        .number_literal => return self.number(node),
        .char_literal => return self.charLiteral(node),
        .identifier => return self.identifier(node),
        .enum_literal => return self.enumLiteral(node),
        .string_literal, .multiline_string_literal => return self.stringLiteral(node),
        .negation => return self.negation(node),
        // ZON forbids parenthesized grouping, but the parser still builds the
        // node; transparently descend (best-effort).
        .grouped_expression => return self.walk(tree.nodeData(node).node_and_token[0]),
        .array_init_one,
        .array_init_one_comma,
        .array_init_dot_two,
        .array_init_dot_two_comma,
        .array_init_dot,
        .array_init_dot_comma,
        .array_init,
        .array_init_comma,
        .struct_init_one,
        .struct_init_one_comma,
        .struct_init_dot_two,
        .struct_init_dot_two_comma,
        .struct_init_dot,
        .struct_init_dot_comma,
        .struct_init,
        .struct_init_comma,
        => return self.container(node),
        else => return error.UnsupportedZon,
    }
}

/// Dispatch an init node to a sequence (array literal) or mapping (struct
/// literal). An empty `.{}` parses as an empty struct init → empty mapping.
fn container(self: *Parser, node: Ast.Node.Index) Error!AST.Node.Id {
    var buf: [2]Ast.Node.Index = undefined;
    if (self.tree.fullArrayInit(&buf, node)) |full| {
        if (full.ast.type_expr != .none) return error.UnsupportedZon; // `T{...}` typed init
        return self.sequence(node, full.ast.elements);
    }
    if (self.tree.fullStructInit(&buf, node)) |full| {
        if (full.ast.type_expr != .none) return error.UnsupportedZon;
        return self.mapping(node, full.ast.fields);
    }
    return error.UnsupportedZon;
}

fn sequence(self: *Parser, node: Ast.Node.Index, elements: []const Ast.Node.Index) Error!AST.Node.Id {
    const seq_id = try self.addNode(.{ .sequence = null }, self.nodeSpan(node));
    try self.captureOpenTrailing(seq_id, node);
    var first: ?AST.Node.Id = null;
    var prev: ?AST.Node.Id = null;
    for (elements) |elem| {
        try self.absorbCommentsUpTo(self.tree.tokenStart(self.tree.firstToken(elem)));
        const child_id = try self.walk(elem);
        // A scalar element claims its own leading; a container element passed it
        // through to its first key inside `walk` (so this is then a no-op).
        try self.claimLeading(child_id);
        self.last_value_id = child_id;
        self.scan_pos = self.node_spans.items[child_id].end;
        if (prev) |p| self.nodes.items[p].next_sibling = child_id else first = child_id;
        prev = child_id;
    }
    try self.absorbCommentsUpTo(self.nodeSpan(node).end); // trailing on the last element
    try self.claimDangling(seq_id); // own-line orphans before the closing brace
    self.nodes.items[seq_id].kind = .{ .sequence = first };
    return seq_id;
}

fn mapping(self: *Parser, node: Ast.Node.Index, fields: []const Ast.Node.Index) Error!AST.Node.Id {
    const tree = self.tree;
    const map_id = try self.addNode(.{ .mapping = null }, self.nodeSpan(node));
    try self.captureOpenTrailing(map_id, node);
    var first: ?AST.Node.Id = null;
    var prev: ?AST.Node.Id = null;
    for (fields) |field| {
        // `.name = value`: the value's first token is `value`; stepping back two
        // tokens lands on the `name` identifier (skipping the `=`).
        const name_token = tree.firstToken(field) - 2;
        // Leading comments above the entry bind to the key.
        try self.absorbCommentsUpTo(tree.tokenStart(name_token));
        const key_id = try self.fieldName(name_token);
        try self.claimLeading(key_id);
        // Don't re-scan the key name / `=` while walking the value.
        self.scan_pos = self.node_spans.items[key_id].end;
        const value_id = try self.walk(field);
        self.last_value_id = value_id;
        self.scan_pos = self.node_spans.items[value_id].end;
        const key_span = self.node_spans.items[key_id];
        const value_span = self.node_spans.items[value_id];
        const kv_id = try self.addNode(
            .{ .keyvalue = .{ .key = key_id, .value = value_id } },
            .{ .start = key_span.start, .end = value_span.end },
        );
        if (prev) |p| self.nodes.items[p].next_sibling = kv_id else first = kv_id;
        prev = kv_id;
    }
    try self.absorbCommentsUpTo(self.nodeSpan(node).end); // trailing on the last entry
    try self.claimDangling(map_id); // own-line orphans before the closing brace
    self.nodes.items[map_id].kind = .{ .mapping = first };
    return map_id;
}

// =========
// SCALARS
// =========

fn number(self: *Parser, node: Ast.Node.Index) Error!AST.Node.Id {
    const span = self.nodeSpan(node);
    const raw = self.sourceSlice(span);
    return self.addNode(.{ .number = .{ .raw = raw, .kind = classifyNumber(raw) } }, span);
}

fn negation(self: *Parser, node: Ast.Node.Index) Error!AST.Node.Id {
    const tree = self.tree;
    const child = tree.nodeData(node).node;
    const span = self.nodeSpan(node); // covers the `-` through the operand
    switch (tree.nodeTag(child)) {
        // `-123`, `-0x1.5p3`: raw text (incl. the sign) is preserved verbatim.
        .number_literal => {
            const raw = self.sourceSlice(span);
            return self.addNode(.{ .number = .{ .raw = raw, .kind = classifyNumber(raw) } }, span);
        },
        // `-inf` is the only other legal negation in ZON.
        .identifier => {
            const ident = tree.tokenSlice(tree.nodeMainToken(child));
            if (std.mem.eql(u8, ident, "inf"))
                return self.addNode(.{ .number = .{ .raw = "-inf", .kind = .float } }, span);
            return error.UnsupportedZon;
        },
        else => return error.UnsupportedZon,
    }
}

fn identifier(self: *Parser, node: Ast.Node.Index) Error!AST.Node.Id {
    const span = self.nodeSpan(node);
    const ident = self.tree.tokenSlice(self.tree.nodeMainToken(node));
    if (std.mem.eql(u8, ident, "true")) return self.addNode(.{ .boolean = true }, span);
    if (std.mem.eql(u8, ident, "false")) return self.addNode(.{ .boolean = false }, span);
    if (std.mem.eql(u8, ident, "null")) return self.addNode(.null_, span);
    if (std.mem.eql(u8, ident, "inf")) return self.addNode(.{ .number = .{ .raw = "inf", .kind = .float } }, span);
    if (std.mem.eql(u8, ident, "nan")) return self.addNode(.{ .number = .{ .raw = "nan", .kind = .float } }, span);
    return error.UnsupportedZon; // ZON allows no other bare identifiers
}

/// `.foo` enum literal → `extended` enum_literal whose text is the bare name (no
/// leading `.`). The name is a valid identifier in the common case (slice
/// straight from source); the rare `.@"quoted"` form is decoded into an owned
/// string.
fn enumLiteral(self: *Parser, node: Ast.Node.Index) Error!AST.Node.Id {
    const tree = self.tree;
    const token = tree.nodeMainToken(node);
    const span = self.tokenSpan(token);
    const slice = tree.tokenSlice(token);
    if (std.mem.startsWith(u8, slice, "@\"")) {
        const decoded = std.zig.string_literal.parseAlloc(self.allocator, slice[1..]) catch |e| switch (e) {
            error.OutOfMemory => return error.OutOfMemory,
            error.InvalidLiteral => return error.InvalidZon,
        };
        errdefer self.allocator.free(decoded);
        try self.owned_strings.append(self.allocator, decoded);
        return self.addNode(.{ .extended = .{ .kind = .enum_literal, .text = decoded } }, span);
    }
    return self.addNode(.{ .extended = .{ .kind = .enum_literal, .text = self.sourceSlice(span) } }, span);
}

fn stringLiteral(self: *Parser, node: Ast.Node.Index) Error!AST.Node.Id {
    const span = self.nodeSpan(node);

    // Let stdlib decode escapes and join multiline `\\` lines for us.
    var aw: std.Io.Writer.Allocating = .init(self.allocator);
    errdefer aw.deinit();
    const result = std.zig.ZonGen.parseStrLit(self.tree.*, node, &aw.writer) catch
        return error.OutOfMemory;
    switch (result) {
        .success => {},
        // `Ast.parse` already validated syntax, so a decode failure here is not
        // expected; treat defensively as malformed input.
        .failure => return error.InvalidZon,
    }

    const owned = try aw.toOwnedSlice();
    errdefer self.allocator.free(owned);
    try self.owned_strings.append(self.allocator, owned);
    return self.addNode(.{ .string = owned }, span);
}

/// `'a'` char literal → `extended` char_literal whose text is the decimal
/// Unicode codepoint. Storing the codepoint (not the raw `'a'`) keeps the
/// Zig-specific char codec confined to this parser and the ZON printer: other
/// formats treat the text as a plain number. The ZON printer re-encodes `'a'`.
fn charLiteral(self: *Parser, node: Ast.Node.Index) Error!AST.Node.Id {
    const span = self.nodeSpan(node);
    const slice = self.tree.tokenSlice(self.tree.nodeMainToken(node));
    const codepoint: u21 = switch (std.zig.string_literal.parseCharLiteral(slice)) {
        .success => |c| c,
        .failure => return error.InvalidZon,
    };
    const raw = try std.fmt.allocPrint(self.allocator, "{d}", .{codepoint});
    errdefer self.allocator.free(raw);
    try self.owned_strings.append(self.allocator, raw);
    return self.addNode(.{ .extended = .{ .kind = .char_literal, .text = raw } }, span);
}

/// Build a string key node from a struct-field name token.
fn fieldName(self: *Parser, token: Ast.TokenIndex) Error!AST.Node.Id {
    const slice = self.tree.tokenSlice(token);
    const span = self.tokenSpan(token);
    if (std.mem.startsWith(u8, slice, "@\"")) {
        const decoded = std.zig.string_literal.parseAlloc(self.allocator, slice[1..]) catch |e| switch (e) {
            error.OutOfMemory => return error.OutOfMemory,
            error.InvalidLiteral => return error.InvalidZon,
        };
        errdefer self.allocator.free(decoded);
        try self.owned_strings.append(self.allocator, decoded);
        return self.addNode(.{ .string = decoded }, span);
    }
    return self.addNode(.{ .string = self.sourceSlice(span) }, span);
}

// =========
// HELPERS
// =========

fn addNode(self: *Parser, kind: AST.Node.Kind, span: Span) Error!AST.Node.Id {
    const id: AST.Node.Id = @intCast(self.nodes.items.len);
    try self.nodes.append(self.allocator, .{ .id = id, .kind = kind, .next_sibling = null });
    try self.node_spans.append(self.allocator, span);
    try self.node_comments.append(self.allocator, .{});
    return id;
}

// ── Comments ─────────────────────────────────────────────────────────────────

/// Scan the source gap `[scan_pos, pos)` for `//` line comments, classifying
/// each: one on the same line as the last value (`last_value_id` set, no newline
/// since) trails it, others buffer as leading. Only `//` and newlines are
/// significant; every other byte (structural punctuation, whitespace) is skipped.
fn absorbCommentsUpTo(self: *Parser, pos: usize) Error!void {
    var i = self.scan_pos;
    while (i < pos) {
        const c = self.source[i];
        if (c == '\n') {
            self.last_value_id = null;
            i += 1;
        } else if (c == '/' and i + 1 < self.source.len and self.source[i + 1] == '/') {
            const start = i + 2;
            var j = start;
            while (j < self.source.len and self.source[j] != '\n') j += 1;
            const comment: AST.Comment = .{ .text = std.mem.trim(u8, self.source[start..j], " \t\r"), .style = .line };
            if (self.last_value_id) |id| {
                // A comment on a multi-line container's closing line is a bottom
                // comment → its `dangling` run; a scalar / inline container keeps
                // the same-line `trailing`.
                if (self.multilineContainer(id, i)) {
                    try self.appendDangling(id, comment);
                } else {
                    self.node_comments.items[id].trailing = comment;
                    self.comments_seen = true;
                }
                self.last_value_id = null;
            } else {
                try self.pending_leading.append(self.allocator, comment);
            }
            i = j;
        } else {
            i += 1;
        }
    }
    if (i > self.scan_pos) self.scan_pos = i;
}

/// Hand the buffered leading comments to node `id` as an owned slice, then clear
/// the buffer. No-op when nothing is buffered.
fn claimLeading(self: *Parser, id: AST.Node.Id) Error!void {
    if (self.pending_leading.items.len == 0) return;
    const owned = try self.allocator.dupe(AST.Comment, self.pending_leading.items);
    self.pending_leading.clearRetainingCapacity();
    self.node_comments.items[id].leading = owned;
    self.comments_seen = true;
}

/// Capture a line comment immediately after a container's `.{` that ends its
/// line (`.{ // c`) as container `id`'s own trailing (the head comment). A field
/// or block comment on the open line is left for the per-field absorb.
fn captureOpenTrailing(self: *Parser, id: AST.Node.Id, node: Ast.Node.Index) Error!void {
    var brace = self.nodeSpan(node).start;
    while (brace < self.source.len and self.source[brace] != '{') brace += 1;
    var i = brace + 1;
    while (i < self.source.len) : (i += 1) {
        switch (self.source[i]) {
            '\n' => return, // open line ended without a comment
            ' ', '\t', '\r' => {},
            '/' => {
                if (i + 1 >= self.source.len or self.source[i + 1] != '/') return;
                const start = i + 2;
                var j = start;
                while (j < self.source.len and self.source[j] != '\n') j += 1;
                self.node_comments.items[id].trailing = .{ .text = std.mem.trim(u8, self.source[start..j], " \t\r"), .style = .line };
                self.comments_seen = true;
                if (j > self.scan_pos) self.scan_pos = j; // skip it in later absorbs
                return;
            },
            else => return, // a field begins on the open line → no head comment
        }
    }
}

/// Whether `id` is a container whose `.{` precedes `cpos` on an earlier line — a
/// multi-line container whose close is on `cpos`'s line.
fn multilineContainer(self: *Parser, id: AST.Node.Id, cpos: usize) bool {
    switch (self.nodes.items[id].kind) {
        .sequence, .mapping => {},
        else => return false,
    }
    const open = self.node_spans.items[id].start;
    if (cpos <= open) return false;
    return std.mem.indexOfScalar(u8, self.source[open..cpos], '\n') != null;
}

/// Append one comment to `id`'s `dangling` run (reallocating onto any orphans
/// already claimed at the close).
fn appendDangling(self: *Parser, id: AST.Node.Id, c: AST.Comment) Error!void {
    const old = self.node_comments.items[id].dangling;
    const grown = try self.allocator.alloc(AST.Comment, old.len + 1);
    @memcpy(grown[0..old.len], old);
    grown[old.len] = c;
    self.allocator.free(old);
    self.node_comments.items[id].dangling = grown;
    self.comments_seen = true;
}

/// Hand buffered orphan comments (own-line comments at the end of a container's
/// body, before its closing brace) to container `id` as its `dangling` run.
fn claimDangling(self: *Parser, id: AST.Node.Id) Error!void {
    if (self.pending_leading.items.len == 0) return;
    const owned = try self.allocator.dupe(AST.Comment, self.pending_leading.items);
    self.pending_leading.clearRetainingCapacity();
    self.node_comments.items[id].dangling = owned;
    self.comments_seen = true;
}

fn nodeSpan(self: *Parser, node: Ast.Node.Index) Span {
    const tree = self.tree;
    const first = tree.firstToken(node);
    const last = tree.lastToken(node);
    return .{
        .start = tree.tokenStart(first),
        .end = tree.tokenStart(last) + tree.tokenSlice(last).len,
    };
}

fn tokenSpan(self: *Parser, token: Ast.TokenIndex) Span {
    const start = self.tree.tokenStart(token);
    return .{ .start = start, .end = start + self.tree.tokenSlice(token).len };
}

fn sourceSlice(self: *Parser, span: Span) []const u8 {
    return self.source[span.start..span.end];
}

/// fig's `Number.kind` is just a hint (the raw text is authoritative). Classify
/// by inspecting the verbatim ZON literal: anything with a fractional point, a
/// hex `p`-exponent, or a decimal `e`-exponent is a float; everything else
/// (incl. `0x`/`0o`/`0b` integers and `_`-separated digits) is an integer.
fn classifyNumber(raw: []const u8) @FieldType(AST.Node.Kind.Number, "kind") {
    if (std.mem.indexOfScalar(u8, raw, '.') != null) return .float;
    const body = if (std.mem.startsWith(u8, raw, "-")) raw[1..] else raw;
    const is_hex = body.len >= 2 and body[0] == '0' and (body[1] == 'x' or body[1] == 'X');
    if (is_hex) {
        // In hex, `e`/`E` are digits; only `p`/`P` introduces an exponent.
        return if (std.mem.indexOfAny(u8, body, "pP") != null) .float else .integer;
    }
    return if (std.mem.indexOfAny(u8, raw, "eE") != null) .float else .integer;
}

// =========
// TESTS
// =========

fn expectParse(input: []const u8, expected: AST) !void {
    var ast = try parseAbstract(testing.allocator, input, .ZON);
    defer ast.deinit();
    try testing.expect(expected.eql(ast));
}

test "scalar literals" {
    try expectParse("true", .{ .allocator = testing.allocator, .root = 0, .nodes = &.{
        .{ .id = 0, .kind = .{ .boolean = true } },
    } });
    try expectParse("null", .{ .allocator = testing.allocator, .root = 0, .nodes = &.{
        .{ .id = 0, .kind = .null_ },
    } });
    try expectParse("42", .{ .allocator = testing.allocator, .root = 0, .nodes = &.{
        .{ .id = 0, .kind = .{ .number = .{ .raw = "42", .kind = .integer } } },
    } });
    try expectParse("-3.5", .{ .allocator = testing.allocator, .root = 0, .nodes = &.{
        .{ .id = 0, .kind = .{ .number = .{ .raw = "-3.5", .kind = .float } } },
    } });
}

test "struct literal becomes a mapping" {
    try expectParse(
        \\.{ .name = "Ada", .age = 36 }
    , .{ .allocator = testing.allocator, .root = 0, .nodes = &.{
        .{ .id = 0, .kind = .{ .mapping = 3 } },
        .{ .id = 1, .kind = .{ .string = "name" } },
        .{ .id = 2, .kind = .{ .string = "Ada" } },
        .{ .id = 3, .kind = .{ .keyvalue = .{ .key = 1, .value = 2 } }, .next_sibling = 6 },
        .{ .id = 4, .kind = .{ .string = "age" } },
        .{ .id = 5, .kind = .{ .number = .{ .raw = "36", .kind = .integer } } },
        .{ .id = 6, .kind = .{ .keyvalue = .{ .key = 4, .value = 5 } } },
    } });
}

test "array literal becomes a sequence" {
    try expectParse(".{ 1, 2, 3 }", .{ .allocator = testing.allocator, .root = 0, .nodes = &.{
        .{ .id = 0, .kind = .{ .sequence = 1 } },
        .{ .id = 1, .kind = .{ .number = .{ .raw = "1", .kind = .integer } }, .next_sibling = 2 },
        .{ .id = 2, .kind = .{ .number = .{ .raw = "2", .kind = .integer } }, .next_sibling = 3 },
        .{ .id = 3, .kind = .{ .number = .{ .raw = "3", .kind = .integer } } },
    } });
}

test "empty .{} is an empty mapping" {
    try expectParse(".{}", .{ .allocator = testing.allocator, .root = 0, .nodes = &.{
        .{ .id = 0, .kind = .{ .mapping = null } },
    } });
}

test "string escapes are decoded" {
    var ast = try parseAbstract(testing.allocator, "\"tab:\\tnl:\\n\"", .ZON);
    defer ast.deinit();
    try testing.expectEqualStrings("tab:\tnl:\n", ast.nodes[ast.root].kind.string);
}

test "multiline string literal is joined" {
    var ast = try parseAbstract(testing.allocator,
        \\.{
        \\    .body =
        \\        \\line one
        \\        \\line two
        \\    ,
        \\}
    , .ZON);
    defer ast.deinit();
    const value = try ast.getValByPath(&.{.{ .key = "body" }});
    try testing.expectEqualStrings("line one\nline two", value.kind.string);
}

test "enum literal preserved as extended" {
    var ast = try parseAbstract(testing.allocator, ".{ .mode = .fast }", .ZON);
    defer ast.deinit();
    const value = try ast.getValByPath(&.{.{ .key = "mode" }});
    try testing.expect(value.kind.extended.kind == .enum_literal);
    try testing.expectEqualStrings("fast", value.kind.extended.text);
}

test "char literal preserved as extended codepoint" {
    var ast = try parseAbstract(testing.allocator, ".{ .c = 'A' }", .ZON);
    defer ast.deinit();
    const value = try ast.getValByPath(&.{.{ .key = "c" }});
    try testing.expect(value.kind.extended.kind == .char_literal);
    try testing.expectEqualStrings("65", value.kind.extended.text);
}

test "nested containers" {
    var ast = try parseAbstract(testing.allocator,
        \\.{ .items = .{ .{ .id = 1 }, .{ .id = 2 } } }
    , .ZON);
    defer ast.deinit();
    const second = try ast.getValByPath(&.{ .{ .key = "items" }, .{ .index = 1 }, .{ .key = "id" } });
    try testing.expectEqualStrings("2", second.kind.number.raw);
}

test "hex and underscored numbers keep raw text" {
    var ast = try parseAbstract(testing.allocator, ".{ .a = 0xFF, .b = 1_000, .c = 0x1.5p3 }", .ZON);
    defer ast.deinit();
    try testing.expectEqualStrings("0xFF", (try ast.getValByPath(&.{.{ .key = "a" }})).kind.number.raw);
    try testing.expect((try ast.getValByPath(&.{.{ .key = "a" }})).kind.number.kind == .integer);
    try testing.expectEqualStrings("1_000", (try ast.getValByPath(&.{.{ .key = "b" }})).kind.number.raw);
    try testing.expect((try ast.getValByPath(&.{.{ .key = "c" }})).kind.number.kind == .float);
}

test "syntax errors are rejected" {
    try testing.expectError(error.InvalidZon, parseAbstract(testing.allocator, ".{ .a = }", .ZON));
    try testing.expectError(error.InvalidZon, parseAbstract(testing.allocator, "", .ZON));
}