fig 1.0.0 - Docs.rs

//! TOML conformance scoreboard against toml-lang/toml-test
//! (https://github.com/toml-lang/toml-test).
//!
//! A ratcheting scoreboard like the YAML harness: each run prints a tally and
//! asserts the score has not dropped below a recorded baseline.
//!
//!   invalid/ : parse-only — fig must reject every file.
//!   valid/   : tagged-JSON comparison — each .toml has a sibling .json that
//!              pins every scalar's type + value (toml-test's own format). fig
//!              parses the .toml, the JSON parser parses the .json, and the two
//!              trees are compared structurally (tables unordered, arrays
//!              ordered, leaves by type + normalized value). This catches silent
//!              mis-parses a pass/fail check can't see.
//!
//! Fixtures are vendored by tools/gen_toml_conformance.zig.
//! Run with: zig build test -Dtoml-conformance=true

const std = @import("std");
const testing = std.testing;

const AST = @import("../ast/ast.zig");
const Parser = @import("parser.zig");
const TomlType = @import("toml.zig").Type;
const JsonParser = @import("../json/parser.zig");
const JsonType = @import("../json/json.zig").Type;

const max_fixture_size = 1024 * 1024;

// Baseline scores. A ratchet: raise as coverage improves; never lower without a
// deliberate reason. A run below baseline fails the test.
// Full conformance for both versions. Every valid file matches its expected
// typed JSON exactly; every invalid file is rejected.
const valid_1_0_baseline = 209;
const invalid_1_0_baseline = 495;
const valid_1_1_baseline = 218;
const invalid_1_1_baseline = 488;

const Score = struct { correct: usize = 0, total: usize = 0 };

test "toml conformance: scoreboard" {
    const v10 = try scoreValidDir("testdata/toml/valid", .TOML_1_0);
    const inv10 = try scoreInvalidDir("testdata/toml/invalid", .TOML_1_0);
    const v11 = try scoreValidDir("testdata/toml-1.1.0/valid", .TOML_1_1);
    const inv11 = try scoreInvalidDir("testdata/toml-1.1.0/invalid", .TOML_1_1);

    std.debug.print(
        \\
        \\TOML conformance (toml-test)
        \\  1.0.0  valid: {d}/{d} (baseline {d})   invalid: {d}/{d} (baseline {d})
        \\  1.1.0  valid: {d}/{d} (baseline {d})   invalid: {d}/{d} (baseline {d})
        \\
    , .{
        v10.correct,   v10.total,   valid_1_0_baseline,
        inv10.correct, inv10.total, invalid_1_0_baseline,
        v11.correct,   v11.total,   valid_1_1_baseline,
        inv11.correct, inv11.total, invalid_1_1_baseline,
    });

    try testing.expect(v10.correct >= valid_1_0_baseline);
    try testing.expect(inv10.correct >= invalid_1_0_baseline);
    try testing.expect(v11.correct >= valid_1_1_baseline);
    try testing.expect(inv11.correct >= invalid_1_1_baseline);
}

fn scoreInvalidDir(dir_path: []const u8, version: TomlType) !Score {
    var threaded = std.Io.Threaded.init(testing.allocator, .{});
    defer threaded.deinit();
    const io = threaded.io();

    var dir = try std.Io.Dir.cwd().openDir(io, dir_path, .{ .iterate = true });
    defer dir.close(io);

    var score: Score = .{};
    var it = dir.iterate();
    while (try it.next(io)) |entry| {
        if (entry.kind != .file or !std.mem.endsWith(u8, entry.name, ".toml")) continue;
        const input = try dir.readFileAlloc(io, entry.name, testing.allocator, .limited(max_fixture_size));
        defer testing.allocator.free(input);

        score.total += 1;
        if (Parser.parse(testing.allocator, input, version)) |doc| {
            var d = doc;
            d.deinit(testing.allocator);
        } else |_| score.correct += 1;
    }
    return score;
}

fn scoreValidDir(dir_path: []const u8, version: TomlType) !Score {
    var threaded = std.Io.Threaded.init(testing.allocator, .{});
    defer threaded.deinit();
    const io = threaded.io();

    var dir = try std.Io.Dir.cwd().openDir(io, dir_path, .{ .iterate = true });
    defer dir.close(io);

    var score: Score = .{};
    var it = dir.iterate();
    while (try it.next(io)) |entry| {
        if (entry.kind != .file or !std.mem.endsWith(u8, entry.name, ".toml")) continue;

        const toml_src = try dir.readFileAlloc(io, entry.name, testing.allocator, .limited(max_fixture_size));
        defer testing.allocator.free(toml_src);

        const json_name = try std.fmt.allocPrint(testing.allocator, "{s}.json", .{entry.name[0 .. entry.name.len - ".toml".len]});
        defer testing.allocator.free(json_name);
        const json_src = dir.readFileAlloc(io, json_name, testing.allocator, .limited(max_fixture_size)) catch continue;
        defer testing.allocator.free(json_src);

        score.total += 1;

        var toml_doc = Parser.parse(testing.allocator, toml_src, version) catch continue;
        defer toml_doc.deinit(testing.allocator);
        var json_doc = JsonParser.parse(testing.allocator, json_src, JsonType.JSON) catch continue;
        defer json_doc.deinit(testing.allocator);

        if (matchValue(&toml_doc.ast, toml_doc.ast.root, &json_doc.ast, json_doc.ast.root)) {
            score.correct += 1;
        }
    }
    return score;
}

// ── tagged-JSON comparison ──────────────────────────────────────────────────

const TagType = enum {
    string,
    integer,
    float,
    bool,
    datetime,
    @"datetime-local",
    @"date-local",
    @"time-local",
};

const Leaf = struct { tag: TagType, value: []const u8 };

/// If `j_id` is a toml-test tagged leaf (`{"type": T, "value": V}` with both
/// values plain strings and T a known type), return it; else null (it's a real
/// table/array). A genuine TOML table with keys "type"/"value" has *tagged
/// objects* as its values, so its "type" value is not a bare string — the
/// disambiguation toml-test decoders rely on.
fn asLeaf(ja: *const AST, j_id: AST.Node.Id) ?Leaf {
    const node = ja.nodes[j_id];
    if (node.kind != .mapping) return null;
    var type_str: ?[]const u8 = null;
    var value_str: ?[]const u8 = null;
    var count: usize = 0;
    var cur = node.kind.mapping;
    while (cur) |id| : (cur = ja.nodes[id].next_sibling) {
        count += 1;
        const kv = ja.nodes[id].kind.keyvalue;
        const key = ja.nodes[kv.key].kind.string;
        const val = ja.nodes[kv.value].kind;
        if (val != .string) return null; // a real table value
        if (std.mem.eql(u8, key, "type")) type_str = val.string;
        if (std.mem.eql(u8, key, "value")) value_str = val.string;
    }
    if (count != 2 or type_str == null or value_str == null) return null;
    const tag = std.meta.stringToEnum(TagType, type_str.?) orelse return null;
    return .{ .tag = tag, .value = value_str.? };
}

fn matchValue(ta: *const AST, t_id: AST.Node.Id, ja: *const AST, j_id: AST.Node.Id) bool {
    if (asLeaf(ja, j_id)) |leaf| return matchLeaf(ta, t_id, leaf);
    return switch (ja.nodes[j_id].kind) {
        .mapping => matchTable(ta, t_id, ja, j_id),
        .sequence => matchArray(ta, t_id, ja, j_id),
        else => false,
    };
}

fn matchTable(ta: *const AST, t_id: AST.Node.Id, ja: *const AST, j_id: AST.Node.Id) bool {
    const tn = ta.nodes[t_id];
    if (tn.kind != .mapping) return false;
    var jcount: usize = 0;
    var jc = ja.nodes[j_id].kind.mapping;
    while (jc) |jid| : (jc = ja.nodes[jid].next_sibling) {
        jcount += 1;
        const jkv = ja.nodes[jid].kind.keyvalue;
        const jkey = ja.nodes[jkv.key].kind.string;
        const tv = childByKey(ta, tn, jkey) orelse return false;
        if (!matchValue(ta, tv, ja, jkv.value)) return false;
    }
    return jcount == countChildren(ta, tn);
}

fn matchArray(ta: *const AST, t_id: AST.Node.Id, ja: *const AST, j_id: AST.Node.Id) bool {
    if (ta.nodes[t_id].kind != .sequence) return false;
    var tc = ta.nodes[t_id].kind.sequence;
    var jc = ja.nodes[j_id].kind.sequence;
    while (true) {
        const tid = tc orelse return jc == null;
        const jid = jc orelse return false;
        if (!matchValue(ta, tid, ja, jid)) return false;
        tc = ta.nodes[tid].next_sibling;
        jc = ja.nodes[jid].next_sibling;
    }
}

fn childByKey(ast: *const AST, mapping: AST.Node, key: []const u8) ?AST.Node.Id {
    var cur = mapping.kind.mapping;
    while (cur) |id| : (cur = ast.nodes[id].next_sibling) {
        const kv = ast.nodes[id].kind.keyvalue;
        if (std.mem.eql(u8, ast.nodes[kv.key].kind.string, key)) return kv.value;
    }
    return null;
}

fn countChildren(ast: *const AST, node: AST.Node) usize {
    var n: usize = 0;
    var cur = switch (node.kind) {
        .mapping, .sequence => |first| first,
        else => return 0,
    };
    while (cur) |id| : (cur = ast.nodes[id].next_sibling) n += 1;
    return n;
}

fn matchLeaf(ta: *const AST, t_id: AST.Node.Id, leaf: Leaf) bool {
    const node = ta.nodes[t_id].kind;
    return switch (leaf.tag) {
        .string => node == .string and std.mem.eql(u8, node.string, leaf.value),
        .bool => node == .boolean and std.mem.eql(u8, if (node.boolean) "true" else "false", leaf.value),
        .integer => node == .number and node.number.kind == .integer and intEqual(node.number.raw, leaf.value),
        .float => node == .number and node.number.kind == .float and floatEqual(node.number.raw, leaf.value),
        .datetime => node == .extended and node.extended.kind == .offset_datetime and datetimeEqual(node.extended.text, leaf.value),
        .@"datetime-local" => node == .extended and node.extended.kind == .local_datetime and datetimeEqual(node.extended.text, leaf.value),
        .@"date-local" => node == .extended and node.extended.kind == .local_date and datetimeEqual(node.extended.text, leaf.value),
        .@"time-local" => node == .extended and node.extended.kind == .local_time and datetimeEqual(node.extended.text, leaf.value),
    };
}

/// Normalize a TOML integer (any radix, underscores, sign) to a decimal string
/// and compare to the expected decimal.
fn intEqual(raw: []const u8, expected: []const u8) bool {
    var buf: [64]u8 = undefined;
    var len: usize = 0;
    for (raw) |c| {
        if (c == '_') continue;
        if (len >= buf.len) return false;
        buf[len] = c;
        len += 1;
    }
    // parseInt with base 0 auto-detects 0x/0o/0b and a leading sign.
    const v = std.fmt.parseInt(i64, buf[0..len], 0) catch return false;
    var out: [32]u8 = undefined;
    const s = std.fmt.bufPrint(&out, "{d}", .{v}) catch return false;
    return std.mem.eql(u8, s, expected);
}

fn floatEqual(raw: []const u8, expected: []const u8) bool {
    const a = parseTomlFloat(raw) orelse return false;
    const b = parseTomlFloat(expected) orelse return false;
    if (std.math.isNan(a)) return std.math.isNan(b);
    if (std.math.isNan(b)) return false;
    return a == b;
}

fn parseTomlFloat(s: []const u8) ?f64 {
    var buf: [64]u8 = undefined;
    var len: usize = 0;
    for (s) |c| {
        if (c == '_') continue;
        if (len >= buf.len) return null;
        buf[len] = c;
        len += 1;
    }
    const stripped = buf[0..len];
    if (eqAny(stripped, &.{ "inf", "+inf" })) return std.math.inf(f64);
    if (std.mem.eql(u8, stripped, "-inf")) return -std.math.inf(f64);
    if (eqAny(stripped, &.{ "nan", "+nan", "-nan" })) return std.math.nan(f64);
    return std.fmt.parseFloat(f64, stripped) catch null;
}

/// Compare datetimes after normalizing the separator to `T` and letters to
/// upper-case (`t`/`z` → `T`/`Z`, space → `T`).
fn datetimeEqual(raw: []const u8, expected: []const u8) bool {
    var s1: [64]u8 = undefined;
    var s2: [64]u8 = undefined;
    var ra: [64]u8 = undefined;
    var eb: [64]u8 = undefined;
    // TOML 1.1 times may omit seconds (`13:37`); toml-test's canonical value
    // always has them (`13:37:00`). Pad both sides before normalizing.
    const a = normalizeDatetime(insertSeconds(raw, &s1), &ra) orelse return false;
    const b = normalizeDatetime(insertSeconds(expected, &s2), &eb) orelse return false;
    return std.mem.eql(u8, a, b);
}

/// Insert `:00` after `HH:MM` when seconds are absent. `s` is a datetime/time;
/// the time starts at index 0 (time-only) or 11 (after `DATE` + separator).
fn insertSeconds(s: []const u8, buf: []u8) []const u8 {
    const time_start: usize = if (s.len >= 3 and s[2] == ':') 0 else if (s.len >= 11) 11 else return s;
    const mm_end = time_start + 5; // past HH:MM
    if (s.len < mm_end) return s;
    if (s.len > mm_end and s[mm_end] == ':') return s; // already has seconds
    if (mm_end + 3 > buf.len or s.len + 3 > buf.len) return s;
    @memcpy(buf[0..mm_end], s[0..mm_end]);
    @memcpy(buf[mm_end .. mm_end + 3], ":00");
    @memcpy(buf[mm_end + 3 .. s.len + 3], s[mm_end..]);
    return buf[0 .. s.len + 3];
}

/// Normalize separator → `T`, letters → upper-case, and fractional seconds to
/// exactly three digits (toml-test's canonical millisecond precision: `.6` →
/// `.600`, `.123456` → `.123`), applied to both sides so they compare equal.
fn normalizeDatetime(s: []const u8, buf: []u8) ?[]const u8 {
    var w: usize = 0;
    var i: usize = 0;
    while (i < s.len) {
        if (w >= buf.len) return null;
        const c = s[i];
        if (i == 10 and c == ' ') {
            buf[w] = 'T';
            w += 1;
            i += 1;
        } else if (c == '.') {
            buf[w] = '.';
            w += 1;
            i += 1;
            const start = i;
            while (i < s.len and s[i] >= '0' and s[i] <= '9') i += 1;
            const frac = s[start..i];
            var k: usize = 0;
            while (k < 3) : (k += 1) {
                if (w >= buf.len) return null;
                buf[w] = if (k < frac.len) frac[k] else '0';
                w += 1;
            }
        } else {
            buf[w] = std.ascii.toUpper(c);
            w += 1;
            i += 1;
        }
    }
    return buf[0..w];
}

fn eqAny(s: []const u8, options: []const []const u8) bool {
    for (options) |o| if (std.mem.eql(u8, s, o)) return true;
    return false;
}