dbmd-cli 0.2.3

The `dbmd` command-line tool for db.md — the open database in plain files. A thin wrapper over dbmd-core: validate, search, query, graph, write, index, and log over a db.md store. Zero AI dependencies.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
//! End-to-end **designed-to-fail** integration test for `corpus-b-edges`,
//! driving the **real `dbmd` binary** as a subprocess (`assert_cmd`) — not
//! in-process library calls. This is the negative-path twin of
//! `e2e_corpus_a.rs`: where corpus-a pins "a clean store validates clean", this
//! pins "a deliberately-broken store reports EXACTLY the seeded issues, and the
//! write surfaces refuse exactly the seeded policy violations without mutating
//! the store."
//!
//! What it asserts:
//!
//!   1. **`validate --all --json corpus-b` equals `EXPECTED/validate.json`**,
//!      issue-for-issue — every seeded breakage surfaces with the correct
//!      `code` / `severity` / `file` / `line` / `key` / `related`, with no extra
//!      and no missing issues, the `summary` tallies match, and the process exits
//!      non-zero (`6`, errors present). Comparison is order-independent (the
//!      golden documents a `(file, line, code)` sort, but the contract is the
//!      SET of issues, not their emission order).
//!   2. **Each `EXPECTED/policy-refusal/<scenario>.json`** — `write` (existing +
//!      nonexistent frozen target), `fm set`, `rename`, `link` — refuses with the
//!      structured `POLICY_FROZEN_PAGE` error, exits non-zero, and leaves the
//!      corpus byte-for-byte unchanged (and never creates the would-be file). Run
//!      against a TEMP COPY so the committed fixture is never mutated.
//!   3. **`EXPECTED/not-a-store.json`** — pointing `validate` at the no-`DB.md`
//!      sibling surfaces exactly one `NOT_A_STORE` issue and exits non-zero, and
//!      the `--all` sweep on the store proper does NOT descend into it.
//!   4. **`EXPECTED/validate.json` is intent-derived, not a snapshot** — its
//!      `_comment` declares hand-derivation; every code it emits is mapped in the
//!      committed `EXPECTED/coverage.json`; that coverage map is a subset of the
//!      `SPEC.md § Validation` code table (no invented codes); `coverage.json`'s
//!      bookkeeping (`spec_code_count`, `all_spec_codes_covered`,
//!      `uncovered_spec_codes`) is checked against the live SPEC table so it can
//!      never over-claim coverage; and every issue names a distinct designed
//!      fixture site. A golden produced by dumping tool output would satisfy none
//!      of these structural properties.
//!
//! The goldens are committed and hand-derived from `SPEC.md § Validation`; this
//! test is their executable contract. Run after any change that could move
//! validate / write-policy behavior:
//! `cargo test -p dbmd-cli --test e2e_corpus_b`.

mod common;

use std::collections::{BTreeMap, BTreeSet};
use std::path::{Path, PathBuf};

use common::{copy_store_to_temp, corpus_b, corpus_b_expected, dbmd};

// ─────────────────────────────────────────────────────────────────────────────
// Issue model — the comparable projection of one validate issue object.
// ─────────────────────────────────────────────────────────────────────────────

/// The fields of a validate issue this test holds the engine to: everything in
/// the `EXPECTED/validate.json` issue shape except the free-text `message` /
/// `suggestion` prose. `related` is normalized to a sorted set so the comparison
/// is order-independent (the golden lists a stable order, but "the partner files
/// involved" is a set, not a sequence).
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct IssueKey {
    severity: String,
    code: String,
    file: String,
    line: Option<i64>,
    key: Option<String>,
    related: Vec<String>,
}

impl IssueKey {
    fn from_json(v: &serde_json::Value) -> Self {
        let related = v
            .get("related")
            .and_then(|r| r.as_array())
            .map(|a| {
                let mut r: Vec<String> = a
                    .iter()
                    .filter_map(|x| x.as_str().map(String::from))
                    .collect();
                r.sort();
                r
            })
            .unwrap_or_default();
        IssueKey {
            severity: str_field(v, "severity"),
            code: str_field(v, "code"),
            file: str_field(v, "file"),
            line: v.get("line").and_then(|l| l.as_i64()),
            key: v.get("key").and_then(|k| k.as_str()).map(String::from),
            related,
        }
    }
}

/// Read a required string field, panicking with context if absent — every issue
/// object the contract describes has `severity` / `code` / `file`.
fn str_field(v: &serde_json::Value, field: &str) -> String {
    v.get(field)
        .and_then(|x| x.as_str())
        .unwrap_or_else(|| panic!("issue object missing string field `{field}`: {v}"))
        .to_string()
}

/// Project a `{issues: [...]}` envelope (or a bare issue array) into the
/// comparable set of [`IssueKey`]s.
fn issue_set(issues: &[serde_json::Value]) -> BTreeSet<IssueKey> {
    issues.iter().map(IssueKey::from_json).collect()
}

// ─────────────────────────────────────────────────────────────────────────────
// 1 — validate --all equals EXPECTED/validate.json, issue-for-issue, exit 6
// ─────────────────────────────────────────────────────────────────────────────

#[test]
fn validate_all_matches_expected_golden_issue_for_issue_and_exits_six() {
    // Run the full SWEEP over the committed designed-to-fail store. Errors are
    // present, so the process MUST exit 6 (ExitCode::ValidationFailed).
    let out = dbmd()
        .args(["--json", "validate", "--all"])
        .arg(corpus_b())
        .assert()
        .failure()
        .code(6)
        .get_output()
        .clone();
    let stdout = String::from_utf8(out.stdout).unwrap();
    let report: serde_json::Value =
        serde_json::from_str(&stdout).expect("validate --all emits a JSON envelope");

    // Load the hand-derived golden.
    let golden: serde_json::Value = read_json(&corpus_b_expected("validate.json"));

    // ── scope + summary tallies ───────────────────────────────────────────────
    assert_eq!(report["scope"], "all", "`--all` is the full-sweep scope");
    for k in ["errors", "warnings", "info", "total"] {
        assert_eq!(
            report["summary"][k], golden["summary"][k],
            "summary.{k} must equal the golden ({} vs {})",
            report["summary"][k], golden["summary"][k]
        );
    }
    // Internal consistency: the tallies add up, and errors > 0 (⇒ exit 6).
    let (e, w, i, t) = (
        u64_at(&report, "errors"),
        u64_at(&report, "warnings"),
        u64_at(&report, "info"),
        u64_at(&report, "total"),
    );
    assert_eq!(e + w + i, t, "summary tallies are self-consistent");
    assert!(
        e > 0,
        "the designed-to-fail store has errors (⇒ non-zero exit)"
    );

    // ── the issue SET equals the golden, exactly ─────────────────────────────
    let got = issue_set(report["issues"].as_array().expect("issues is an array"));
    let want = issue_set(
        golden["issues"]
            .as_array()
            .expect("golden issues is an array"),
    );

    let missing: Vec<&IssueKey> = want.difference(&got).collect();
    let extra: Vec<&IssueKey> = got.difference(&want).collect();
    assert!(
        missing.is_empty() && extra.is_empty(),
        "validate --all must emit EXACTLY the golden issue set.\n\
         MISSING (in EXPECTED, not emitted): {missing:#?}\n\
         EXTRA (emitted, not in EXPECTED): {extra:#?}"
    );

    // Equal as sets AND equal in count (no duplicate issue objects collapsed by
    // the set): the golden's array length is the emitted array length.
    assert_eq!(
        report["issues"].as_array().unwrap().len(),
        golden["issues"].as_array().unwrap().len(),
        "no duplicate / dropped issues vs the golden array length"
    );

    // ── per-code multiplicity equals the golden ──────────────────────────────
    // (e.g. SCHEMA_SHAPE_MISMATCH fires exactly twice: email + date.)
    assert_eq!(
        code_histogram(report["issues"].as_array().unwrap()),
        code_histogram(golden["issues"].as_array().unwrap()),
        "the per-code issue counts must match the golden exactly"
    );

    // ── the layer-appropriate-type fixture fires exactly one warning ─────────
    // A `type: contact` filed under `wiki/` (its canonical layer is `records/`)
    // is valid-but-unusual → exactly one `LAYER_TYPE_MISMATCH` warning on that
    // file, anchored to the `type:` line, and NOT an error (the placement is a
    // convention nudge, not a hard block).
    let layer_issues: Vec<&serde_json::Value> = report["issues"]
        .as_array()
        .unwrap()
        .iter()
        .filter(|i| i["code"] == "LAYER_TYPE_MISMATCH")
        .collect();
    assert_eq!(
        layer_issues.len(),
        1,
        "exactly one LAYER_TYPE_MISMATCH in the sweep: {layer_issues:#?}"
    );
    let li = layer_issues[0];
    assert_eq!(
        li["severity"], "warning",
        "layer mismatch is a warning, not an error"
    );
    assert_eq!(
        li["file"], "wiki/contacts/misplaced-contact.md",
        "the misplaced contact under wiki/ is the fixture"
    );
    assert_eq!(li["key"], "type", "the issue is keyed on the `type` field");

    // ── every emitted issue carries the full contract shape ──────────────────
    for issue in report["issues"].as_array().unwrap() {
        for field in [
            "severity", "code", "file", "line", "key", "message", "related",
        ] {
            assert!(
                issue.get(field).is_some(),
                "every issue object has the `{field}` key (null allowed for line/key): {issue}"
            );
        }
        let sev = issue["severity"].as_str().unwrap();
        assert!(
            matches!(sev, "error" | "warning" | "info"),
            "severity is one of the three words, got {sev:?}"
        );
    }
}

/// `{code -> count}` over an issue array.
fn code_histogram(issues: &[serde_json::Value]) -> BTreeMap<String, usize> {
    let mut h = BTreeMap::new();
    for i in issues {
        if let Some(c) = i.get("code").and_then(|c| c.as_str()) {
            *h.entry(c.to_string()).or_insert(0) += 1;
        }
    }
    h
}

fn u64_at(report: &serde_json::Value, key: &str) -> u64 {
    report["summary"][key]
        .as_u64()
        .unwrap_or_else(|| panic!("summary.{key} is a number"))
}

// ─────────────────────────────────────────────────────────────────────────────
// 2 — policy-refusal scenarios refuse with the right error + leave files intact
// ─────────────────────────────────────────────────────────────────────────────

/// A committed `EXPECTED/policy-refusal/<scenario>.json` fixture: the exact
/// invocation, the expected structured error code, and the no-write contract.
#[derive(serde::Deserialize)]
struct PolicyRefusal {
    invocation: String,
    exit_code_nonzero: bool,
    no_write_occurred: bool,
    error: PolicyError,
}

#[derive(serde::Deserialize)]
struct PolicyError {
    code: String,
    /// The frozen path the refusal must name (the fixture's `error.file`).
    file: String,
}

/// Every write-surface refusal fixture committed under `policy-refusal/`.
const POLICY_REFUSAL_FIXTURES: &[&str] = &[
    "write.json",
    "fm-set.json",
    "rename.json",
    "link.json",
    "write-nonexistent-frozen.json",
];

/// Split a fixture's `invocation` ("dbmd <args...> --json") into the argv the
/// real binary receives, dropping the leading `dbmd`. The `--json` flag is kept
/// (it makes the error structured on stderr); `--dir` is appended by the caller.
///
/// Tokenizes shell-style so a single-quoted argument with spaces (e.g.
/// `--summary 'overwrite attempt'`) becomes ONE argv element — the fixtures use
/// single quotes around multi-word `--summary` values.
fn invocation_args(invocation: &str) -> Vec<String> {
    let mut tokens = Vec::new();
    let mut cur = String::new();
    let mut in_quote = false;
    let mut started = false; // distinguishes "" (a real empty arg) from no-arg
    for c in invocation.chars() {
        match c {
            '\'' => {
                in_quote = !in_quote;
                started = true;
            }
            c if c.is_whitespace() && !in_quote => {
                if started {
                    tokens.push(std::mem::take(&mut cur));
                    started = false;
                }
            }
            c => {
                cur.push(c);
                started = true;
            }
        }
    }
    if started {
        tokens.push(cur);
    }
    assert_eq!(
        tokens.first().map(String::as_str),
        Some("dbmd"),
        "invocation starts with `dbmd`"
    );
    tokens.into_iter().skip(1).collect()
}

#[test]
fn policy_refusals_refuse_with_structured_error_and_do_not_write() {
    for fixture in POLICY_REFUSAL_FIXTURES {
        let golden: PolicyRefusal = {
            let raw =
                std::fs::read_to_string(corpus_b_expected(&format!("policy-refusal/{fixture}")))
                    .unwrap_or_else(|_| panic!("EXPECTED/policy-refusal/{fixture} is committed"));
            serde_json::from_str(&raw)
                .unwrap_or_else(|e| panic!("policy-refusal/{fixture} is valid JSON: {e}"))
        };
        assert_eq!(
            golden.error.code, "POLICY_FROZEN_PAGE",
            "every policy-refusal fixture is a frozen-page refusal"
        );
        assert!(
            golden.exit_code_nonzero && golden.no_write_occurred,
            "fixture contract"
        );

        // Work against a fresh temp copy so the committed corpus is never mutated.
        let (_guard, store) = copy_store_to_temp(&corpus_b());

        // The frozen target as a store-relative path; capture its before-state
        // (content if present, or "absent" — one fixture targets a frozen path
        // that does not exist on disk, proving refusal is keyed on the policy
        // path, not file presence).
        let target_rel = &golden.error.file;
        let target_abs = store.join(target_rel);
        let before = std::fs::read(&target_abs).ok();

        // Run the exact committed invocation against the temp store. We set the
        // store as the working directory (rather than appending `--dir`, which
        // not every subcommand's positional parser accepts after its operands) —
        // the fixtures' invocations carry no `--dir`, so this runs them verbatim.
        let args = invocation_args(&golden.invocation);
        let out = dbmd()
            .current_dir(&store)
            .args(&args)
            .assert()
            .failure() // exit_code_nonzero
            .get_output()
            .clone();

        // ── structured error: code + the frozen path, on stderr under --json ──
        let stderr = String::from_utf8(out.stderr).unwrap();
        let err: serde_json::Value = serde_json::from_str(stderr.trim()).unwrap_or_else(|e| {
            panic!("{fixture}: refusal must emit a JSON error on stderr: {e}\nstderr: {stderr}")
        });
        assert_eq!(
            err["error"]["code"], "POLICY_FROZEN_PAGE",
            "{fixture}: the refusal carries the structured POLICY_FROZEN_PAGE code, got {}",
            err["error"]
        );
        let msg = err["error"]["message"].as_str().unwrap_or("");
        assert!(
            msg.contains(target_rel),
            "{fixture}: the refusal message must name the frozen path {target_rel:?}; got {msg:?}"
        );

        // ── exit is the policy code (4), which is non-zero ────────────────────
        let code = out.status.code().expect("process exited normally");
        assert_eq!(
            code, 4,
            "{fixture}: a frozen-page refusal exits 4 (ExitCode::Policy)"
        );

        // ── no_write_occurred: the corpus file is byte-for-byte unchanged ─────
        let after = std::fs::read(&target_abs).ok();
        assert_eq!(
            before,
            after,
            "{fixture}: the frozen target {target_rel:?} must be byte-for-byte unchanged \
             (before-present={}, after-present={})",
            before.is_some(),
            after.is_some()
        );

        // A `write` to a NONEXISTENT frozen path must not have created the file
        // at the requested path — nor at any sharded relocation of it (the
        // `wiki-page` foldering would otherwise send it to `wiki/topics/…`).
        if before.is_none() {
            assert!(
                !target_abs.exists(),
                "{fixture}: the refused nonexistent frozen path must NOT be created"
            );
            if let Some(name) = Path::new(target_rel).file_name() {
                let sharded = store.join("wiki/topics").join(name);
                assert!(
                    !sharded.exists(),
                    "{fixture}: the refused write must not slip through to a sharded location {:?}",
                    sharded
                );
            }
        }

        // ── the rest of the store is untouched: the only thing that could have
        //    changed is the target; assert the store still validates to the SAME
        //    issue set as the pristine corpus (no side effects from the refusal).
        //    (Cheap proxy: the file count is unchanged.)
        assert_eq!(
            md_file_count(&store),
            md_file_count(&corpus_b()),
            "{fixture}: a refusal must not add or remove any file in the store"
        );
    }
}

/// Count `.md` files under a store (recursive). A refusal must not change it.
fn md_file_count(root: &Path) -> usize {
    fn walk(dir: &Path, n: &mut usize) {
        if let Ok(rd) = std::fs::read_dir(dir) {
            for e in rd.flatten() {
                let p = e.path();
                let name = e.file_name();
                let name = name.to_str().unwrap_or("");
                if name.starts_with('.') {
                    continue;
                }
                if p.is_dir() {
                    walk(&p, n);
                } else if name.ends_with(".md") {
                    *n += 1;
                }
            }
        }
    }
    let mut n = 0;
    walk(root, &mut n);
    n
}

// ─────────────────────────────────────────────────────────────────────────────
// 3 — NOT_A_STORE: the no-DB.md sibling, and the sweep does not descend into it
// ─────────────────────────────────────────────────────────────────────────────

#[test]
fn not_a_store_sibling_is_one_issue_and_outside_the_sweep() {
    let golden: serde_json::Value = read_json(&corpus_b_expected("not-a-store.json"));
    assert!(
        golden["exit_code_nonzero"].as_bool().unwrap_or(false),
        "the not-a-store fixture exits non-zero"
    );

    // Pointing `validate` directly at the no-DB.md sibling: exactly one
    // NOT_A_STORE issue, non-zero exit (reported as a validation issue, exit 6 —
    // not a bare open error).
    let sibling = corpus_b().join("not-a-store");
    let out = dbmd()
        .args(["--json", "validate"])
        .arg(&sibling)
        .assert()
        .failure()
        .code(6)
        .get_output()
        .clone();
    let report: serde_json::Value =
        serde_json::from_str(&String::from_utf8(out.stdout).unwrap()).unwrap();

    // Exactly one issue, and it is the golden's NOT_A_STORE. The issue `file` is
    // the path the user passed (here absolute; the golden documents it
    // repo-relative because that is how the golden invocation was run), so we
    // hold the engine to the stable parts — code/severity/line/key/related and
    // the SHAPE of the golden — plus that the path names the `not-a-store` dir.
    let issues = report["issues"].as_array().unwrap();
    assert_eq!(
        issues.len(),
        1,
        "exactly one issue for the no-store path: {issues:#?}"
    );
    let golden_issue = &golden["issues"].as_array().unwrap()[0];
    let issue = &issues[0];
    assert_eq!(issue["code"], golden_issue["code"], "code is NOT_A_STORE");
    assert_eq!(issue["code"], "NOT_A_STORE");
    assert_eq!(issue["severity"], golden_issue["severity"]);
    assert_eq!(issue["line"], golden_issue["line"], "line is null");
    assert_eq!(issue["key"], golden_issue["key"], "key is null");
    assert_eq!(
        issue["related"], golden_issue["related"],
        "related is empty"
    );
    // The golden's `file` is the relative spelling of the same directory; the
    // emitted one ends with the same `not-a-store` component.
    let golden_file = golden_issue["file"].as_str().unwrap();
    assert!(
        golden_file.ends_with("not-a-store"),
        "golden file names the sibling"
    );
    assert!(
        issue["file"]
            .as_str()
            .unwrap()
            .replace('\\', "/")
            .ends_with("not-a-store"),
        "the emitted NOT_A_STORE file names the no-DB.md sibling, got {}",
        issue["file"]
    );

    // And the corpus-b `--all` sweep does NOT descend into the non-canonical
    // sibling: no NOT_A_STORE in the store-proper report, and none of the
    // sibling's files appear in it.
    let sweep: serde_json::Value = {
        let out = dbmd()
            .args(["--json", "validate", "--all"])
            .arg(corpus_b())
            .assert()
            .failure()
            .get_output()
            .clone();
        serde_json::from_str(&String::from_utf8(out.stdout).unwrap()).unwrap()
    };
    for issue in sweep["issues"].as_array().unwrap() {
        assert_ne!(
            issue["code"], "NOT_A_STORE",
            "the store-proper sweep never emits NOT_A_STORE"
        );
        let file = issue["file"].as_str().unwrap_or("");
        assert!(
            !file.starts_with("not-a-store"),
            "the sweep must not descend into the non-canonical sibling, saw {file:?}"
        );
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// 3b — DB.md structure: the bad-db-md/ sub-store trips the three DB_MD_* codes
//      in a single SEPARATE invocation, and the corpus-b root sweep never
//      descends into it.
// ─────────────────────────────────────────────────────────────────────────────

#[test]
fn bad_db_md_substore_emits_the_three_db_md_codes_and_is_outside_the_sweep() {
    let golden: serde_json::Value = read_json(&corpus_b_expected("bad-db-md.json"));
    assert!(
        golden["exit_code_nonzero"].as_bool().unwrap_or(false),
        "the bad-db-md fixture exits non-zero"
    );

    // The golden is hand-derived (provenance in `_comment`), not a snapshot.
    let comment = golden["_comment"].as_str().unwrap_or("").to_lowercase();
    assert!(
        comment.contains("hand-derived") && comment.contains("never copied"),
        "bad-db-md.json declares hand-derivation and that it is not copied from output"
    );

    // Point `validate --all` straight at the sub-store. Its DB.md is a valid
    // marker (the filename), so this is a real store whose IDENTITY contract
    // fails — exit 6 (ValidationFailed), not a bare open error.
    let substore = corpus_b().join("bad-db-md");
    let out = dbmd()
        .args(["--json", "validate", "--all"])
        .arg(&substore)
        .assert()
        .failure()
        .code(6)
        .get_output()
        .clone();
    let report: serde_json::Value =
        serde_json::from_str(&String::from_utf8(out.stdout).unwrap()).unwrap();

    // ── the issue SET equals the golden, exactly (the three DB_MD_* codes) ────
    let got = issue_set(report["issues"].as_array().expect("issues is an array"));
    let want = issue_set(golden["issues"].as_array().expect("golden issues array"));
    let missing: Vec<&IssueKey> = want.difference(&got).collect();
    let extra: Vec<&IssueKey> = got.difference(&want).collect();
    assert!(
        missing.is_empty() && extra.is_empty(),
        "bad-db-md validate must emit EXACTLY the golden issue set.\n\
         MISSING (in EXPECTED, not emitted): {missing:#?}\n\
         EXTRA (emitted, not in EXPECTED): {extra:#?}"
    );

    // The exact three codes, with the right severities (2 error + 1 warning).
    assert_eq!(
        code_histogram(report["issues"].as_array().unwrap()),
        code_histogram(golden["issues"].as_array().unwrap()),
        "per-code counts equal the golden"
    );
    let codes: BTreeSet<&str> = report["issues"]
        .as_array()
        .unwrap()
        .iter()
        .filter_map(|i| i["code"].as_str())
        .collect();
    assert_eq!(
        codes,
        BTreeSet::from([
            "DB_MD_BAD_TYPE",
            "DB_MD_MISSING_FIELD",
            "DB_MD_UNKNOWN_SECTION"
        ]),
        "exactly the three DB.md-structure codes fire"
    );

    // Summary tallies match the golden (2 errors, 1 warning, 0 info, total 3).
    for k in ["errors", "warnings", "info", "total"] {
        assert_eq!(
            report["summary"][k], golden["summary"][k],
            "summary.{k} equals the golden"
        );
    }

    // The corpus-b `--all` sweep does NOT descend into the sibling sub-store:
    // no DB_MD_* code, and no issue whose file path is rooted in `bad-db-md/`.
    // (The sweep checks only the corpus-b ROOT `DB.md`, which is clean.)
    let sweep: serde_json::Value = {
        let out = dbmd()
            .args(["--json", "validate", "--all"])
            .arg(corpus_b())
            .assert()
            .failure()
            .get_output()
            .clone();
        serde_json::from_str(&String::from_utf8(out.stdout).unwrap()).unwrap()
    };
    for issue in sweep["issues"].as_array().unwrap() {
        let code = issue["code"].as_str().unwrap_or("");
        assert!(
            !code.starts_with("DB_MD_"),
            "the corpus-b root sweep's DB.md is clean — no DB_MD_* code, saw {code}"
        );
        let file = issue["file"].as_str().unwrap_or("").replace('\\', "/");
        assert!(
            !file.starts_with("bad-db-md"),
            "the sweep must not descend into the bad-db-md sibling, saw {file:?}"
        );
    }
}

// ─────────────────────────────────────────────────────────────────────────────
// 4 — the golden is INTENT-DERIVED, not a snapshot of tool output
// ─────────────────────────────────────────────────────────────────────────────

#[test]
fn expected_validate_json_is_intent_derived_not_a_snapshot() {
    let golden: serde_json::Value = read_json(&corpus_b_expected("validate.json"));

    // (a) The golden declares its hand-derivation in `_comment` — a snapshot of
    //     tool output would carry no such provenance. The contract: it is
    //     derived from SPEC + what the corpus breaks, NEVER copied from output.
    let comment = golden["_comment"].as_str().unwrap_or("");
    let lc = comment.to_lowercase();
    assert!(
        lc.contains("hand-derived") || lc.contains("intent-derived"),
        "EXPECTED/validate.json must declare hand/intent derivation in _comment, got {comment:?}"
    );
    assert!(
        lc.contains("spec.md") || lc.contains("spec"),
        "the golden anchors itself to SPEC.md, got {comment:?}"
    );
    assert!(
        lc.contains("never copied")
            || lc.contains("not") && lc.contains("snapshot")
            || lc.contains("never be copied")
            || lc.contains("never copied from"),
        "the golden states it is not a snapshot of tool output, got {comment:?}"
    );

    // (b) Every code the golden emits is mapped in the committed coverage.json
    //     (each code → the fixture that seeds it). A code that fired by accident
    //     (a snapshot artifact) would be unmapped.
    let coverage: serde_json::Value = read_json(&corpus_b_expected("coverage.json"));
    let mapped: BTreeSet<String> = coverage["coverage"]
        .as_object()
        .expect("coverage.coverage is an object")
        .keys()
        .cloned()
        .chain(
            coverage
                .get("plan_extensions")
                .and_then(|p| p.as_object())
                .map(|o| o.keys().cloned().collect::<Vec<_>>())
                .unwrap_or_default(),
        )
        .collect();

    let emitted: BTreeSet<String> = golden["issues"]
        .as_array()
        .unwrap()
        .iter()
        .filter_map(|i| i["code"].as_str().map(String::from))
        .collect();
    let unmapped: Vec<&String> = emitted.difference(&mapped).collect();
    assert!(
        unmapped.is_empty(),
        "every code in the golden must be mapped to a fixture in coverage.json; unmapped: {unmapped:?}"
    );

    // (c) coverage.json's mapped codes are a SUBSET of the SPEC § Validation
    //     code table — the golden invents no codes. We read SPEC.md and pull
    //     every `| `CODE` |` row of the canonical-codes table.
    let spec_codes = spec_validation_codes();
    let invented: Vec<&String> = mapped.difference(&spec_codes).collect();
    assert!(
        invented.is_empty(),
        "coverage.json maps only real SPEC codes; not in the SPEC table: {invented:?}"
    );

    // (c2) coverage.json's bookkeeping cannot OVER-claim coverage. A SPEC code is
    //      "seeded" iff it is mapped to some fixture — whether under `coverage` or
    //      under `plan_extensions` (the latter just annotates a code as also
    //      plan-mandated; it does not make the code unseeded). So the seeded-SPEC
    //      set is `mapped ∩ spec_codes`, and the true gap is the rest of the SPEC
    //      table. The committed `uncovered_spec_codes` MUST equal that gap exactly
    //      (both directions), and `spec_code_count` / `all_spec_codes_covered` MUST
    //      agree with the SPEC table. This is the regression guard against a
    //      coverage.json that silently drops uncovered codes from the count and
    //      asserts full coverage.
    let seeded_spec: BTreeSet<String> = mapped.intersection(&spec_codes).cloned().collect();
    let true_uncovered: BTreeSet<String> = spec_codes.difference(&seeded_spec).cloned().collect();
    let declared_uncovered: BTreeSet<String> = coverage["uncovered_spec_codes"]
        .as_array()
        .expect("coverage.json declares uncovered_spec_codes (array)")
        .iter()
        .filter_map(|c| c.as_str().map(String::from))
        .collect();
    assert_eq!(
        declared_uncovered, true_uncovered,
        "coverage.json's uncovered_spec_codes must equal SPEC-codes minus seeded codes \
         exactly — no SPEC code may be silently dropped, and none falsely claimed uncovered"
    );
    let spec_code_count = coverage["spec_code_count"]
        .as_u64()
        .expect("coverage.json declares spec_code_count (number)")
        as usize;
    assert_eq!(
        spec_code_count,
        spec_codes.len(),
        "coverage.json's spec_code_count must equal the real SPEC § Validation code count"
    );
    let all_covered = coverage["all_spec_codes_covered"]
        .as_bool()
        .expect("coverage.json declares all_spec_codes_covered (bool)");
    assert_eq!(
        all_covered,
        true_uncovered.is_empty(),
        "all_spec_codes_covered must be true iff every SPEC code is seeded \
         (uncovered: {true_uncovered:?})"
    );

    // (c3) The four Block-1 validate checks (DB.md identity/structure +
    //      layer-appropriate type) are REAL, not aspirational. The plan once
    //      drifted to claim these checks "await a SPEC code"; they have since
    //      landed (SPEC § Validation 40 → 44). This pins the substance that
    //      claim got wrong: each of the four codes MUST be a row in the live
    //      SPEC table AND seeded (mapped to a fixture) here — so a regression
    //      that drops a code from SPEC, or stops seeding it, turns this red with
    //      a code-named message rather than only nudging the aggregate counts.
    for code in [
        "DB_MD_BAD_TYPE",
        "DB_MD_MISSING_FIELD",
        "DB_MD_UNKNOWN_SECTION",
        "LAYER_TYPE_MISMATCH",
    ] {
        assert!(
            spec_codes.contains(code),
            "Block-1 validate code `{code}` must be a row in the live SPEC § Validation table \
             (these checks no longer 'await a SPEC code' — the SPEC defines them)"
        );
        assert!(
            seeded_spec.contains(code),
            "Block-1 validate code `{code}` must be seeded by a corpus-b fixture in coverage.json \
             (the DB.md-identity / layer-type checks are exercised, not just declared)"
        );
    }

    // (d) One designed breakage per fixture: the issues spread across MANY
    //     distinct fixture files (the breakage sites), not a handful — a clean
    //     one-issue-per-fixture structure a raw dump would not have. The golden
    //     seeds 39 issues across well over a dozen distinct files.
    let distinct_files: BTreeSet<&str> = golden["issues"]
        .as_array()
        .unwrap()
        .iter()
        .filter_map(|i| i["file"].as_str())
        .collect();
    assert!(
        distinct_files.len() >= 15,
        "the breakages are spread across distinct designed fixtures (got {} files)",
        distinct_files.len()
    );

    // (e) Every issue carries a deterministic, non-empty `suggestion` — the
    //     hand-authored remediation hint. A field-absent issue is anchored to a
    //     real line (never null where the README says line 1). Spot-check the
    //     structural invariants the README pins.
    for issue in golden["issues"].as_array().unwrap() {
        assert!(
            issue["suggestion"]
                .as_str()
                .map(|s| !s.is_empty())
                .unwrap_or(false),
            "each golden issue has a deterministic remediation suggestion: {issue}"
        );
    }

    // (f) The dedup precedence (README rule #1): each DUP_* issue is reported
    //     ONCE with the colliding partner(s) in `related` — never duplicated per
    //     partner. So every DUP_* issue has a non-empty `related`.
    for issue in golden["issues"].as_array().unwrap() {
        let code = issue["code"].as_str().unwrap_or("");
        if code.starts_with("DUP_") {
            let related = issue["related"].as_array().map(|a| a.len()).unwrap_or(0);
            assert!(
                related >= 1,
                "{code} reports one issue with the partner in `related` (rule #1): {issue}"
            );
        }
    }
}

/// Parse `SPEC.md § Validation` and return the set of canonical issue codes
/// (`| `CODE` | severity | … |` rows). This is the independent source of truth
/// the golden's coverage map must be a subset of.
fn spec_validation_codes() -> BTreeSet<String> {
    let spec = std::fs::read_to_string(repo_root().join("SPEC.md")).expect("SPEC.md at repo root");
    let mut codes = BTreeSet::new();
    for line in spec.lines() {
        let t = line.trim_start();
        // A canonical-code table row: `| `CODE` | <severity> | … |`. Pull the
        // first backtick-quoted ALL-CAPS token on a markdown table row.
        if !t.starts_with("| `") {
            continue;
        }
        if let Some(rest) = t.strip_prefix("| `") {
            if let Some((code, _)) = rest.split_once('`') {
                if !code.is_empty() && code.chars().all(|c| c.is_ascii_uppercase() || c == '_') {
                    codes.insert(code.to_string());
                }
            }
        }
    }
    assert!(
        codes.len() >= 30,
        "parsed the SPEC validation code table (got {} codes)",
        codes.len()
    );
    codes
}

// ─────────────────────────────────────────────────────────────────────────────
// helpers
// ─────────────────────────────────────────────────────────────────────────────

/// Read + parse a committed JSON golden, with a path-bearing panic on failure.
fn read_json(path: &Path) -> serde_json::Value {
    let raw = std::fs::read_to_string(path)
        .unwrap_or_else(|_| panic!("committed golden is missing: {}", path.display()));
    serde_json::from_str(&raw)
        .unwrap_or_else(|e| panic!("golden {} is valid JSON: {e}", path.display()))
}

/// The repo root, resolved from this crate's manifest (`crates/dbmd-cli` →
/// `../..`). Used to read `SPEC.md`, the independent code-table source.
fn repo_root() -> PathBuf {
    PathBuf::from(env!("CARGO_MANIFEST_DIR"))
        .join("..")
        .join("..")
}