mod common;
use std::collections::{BTreeMap, BTreeSet};
use std::path::{Path, PathBuf};
use common::{copy_store_to_temp, corpus_b, corpus_b_expected, dbmd};
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct IssueKey {
severity: String,
code: String,
file: String,
line: Option<i64>,
key: Option<String>,
related: Vec<String>,
}
impl IssueKey {
fn from_json(v: &serde_json::Value) -> Self {
let related = v
.get("related")
.and_then(|r| r.as_array())
.map(|a| {
let mut r: Vec<String> = a
.iter()
.filter_map(|x| x.as_str().map(String::from))
.collect();
r.sort();
r
})
.unwrap_or_default();
IssueKey {
severity: str_field(v, "severity"),
code: str_field(v, "code"),
file: str_field(v, "file"),
line: v.get("line").and_then(|l| l.as_i64()),
key: v.get("key").and_then(|k| k.as_str()).map(String::from),
related,
}
}
}
fn str_field(v: &serde_json::Value, field: &str) -> String {
v.get(field)
.and_then(|x| x.as_str())
.unwrap_or_else(|| panic!("issue object missing string field `{field}`: {v}"))
.to_string()
}
fn issue_set(issues: &[serde_json::Value]) -> BTreeSet<IssueKey> {
issues.iter().map(IssueKey::from_json).collect()
}
#[test]
fn validate_all_matches_expected_golden_issue_for_issue_and_exits_six() {
let out = dbmd()
.args(["--json", "validate", "--all"])
.arg(corpus_b())
.assert()
.failure()
.code(6)
.get_output()
.clone();
let stdout = String::from_utf8(out.stdout).unwrap();
let report: serde_json::Value =
serde_json::from_str(&stdout).expect("validate --all emits a JSON envelope");
let golden: serde_json::Value = read_json(&corpus_b_expected("validate.json"));
assert_eq!(report["scope"], "all", "`--all` is the full-sweep scope");
for k in ["errors", "warnings", "info", "total"] {
assert_eq!(
report["summary"][k], golden["summary"][k],
"summary.{k} must equal the golden ({} vs {})",
report["summary"][k], golden["summary"][k]
);
}
let (e, w, i, t) = (
u64_at(&report, "errors"),
u64_at(&report, "warnings"),
u64_at(&report, "info"),
u64_at(&report, "total"),
);
assert_eq!(e + w + i, t, "summary tallies are self-consistent");
assert!(
e > 0,
"the designed-to-fail store has errors (⇒ non-zero exit)"
);
let got = issue_set(report["issues"].as_array().expect("issues is an array"));
let want = issue_set(
golden["issues"]
.as_array()
.expect("golden issues is an array"),
);
let missing: Vec<&IssueKey> = want.difference(&got).collect();
let extra: Vec<&IssueKey> = got.difference(&want).collect();
assert!(
missing.is_empty() && extra.is_empty(),
"validate --all must emit EXACTLY the golden issue set.\n\
MISSING (in EXPECTED, not emitted): {missing:#?}\n\
EXTRA (emitted, not in EXPECTED): {extra:#?}"
);
assert_eq!(
report["issues"].as_array().unwrap().len(),
golden["issues"].as_array().unwrap().len(),
"no duplicate / dropped issues vs the golden array length"
);
assert_eq!(
code_histogram(report["issues"].as_array().unwrap()),
code_histogram(golden["issues"].as_array().unwrap()),
"the per-code issue counts must match the golden exactly"
);
let layer_issues: Vec<&serde_json::Value> = report["issues"]
.as_array()
.unwrap()
.iter()
.filter(|i| i["code"] == "LAYER_TYPE_MISMATCH")
.collect();
assert_eq!(
layer_issues.len(),
1,
"exactly one LAYER_TYPE_MISMATCH in the sweep: {layer_issues:#?}"
);
let li = layer_issues[0];
assert_eq!(
li["severity"], "warning",
"layer mismatch is a warning, not an error"
);
assert_eq!(
li["file"], "wiki/contacts/misplaced-contact.md",
"the misplaced contact under wiki/ is the fixture"
);
assert_eq!(li["key"], "type", "the issue is keyed on the `type` field");
for issue in report["issues"].as_array().unwrap() {
for field in [
"severity", "code", "file", "line", "key", "message", "related",
] {
assert!(
issue.get(field).is_some(),
"every issue object has the `{field}` key (null allowed for line/key): {issue}"
);
}
let sev = issue["severity"].as_str().unwrap();
assert!(
matches!(sev, "error" | "warning" | "info"),
"severity is one of the three words, got {sev:?}"
);
}
}
fn code_histogram(issues: &[serde_json::Value]) -> BTreeMap<String, usize> {
let mut h = BTreeMap::new();
for i in issues {
if let Some(c) = i.get("code").and_then(|c| c.as_str()) {
*h.entry(c.to_string()).or_insert(0) += 1;
}
}
h
}
fn u64_at(report: &serde_json::Value, key: &str) -> u64 {
report["summary"][key]
.as_u64()
.unwrap_or_else(|| panic!("summary.{key} is a number"))
}
#[derive(serde::Deserialize)]
struct PolicyRefusal {
invocation: String,
exit_code_nonzero: bool,
no_write_occurred: bool,
error: PolicyError,
}
#[derive(serde::Deserialize)]
struct PolicyError {
code: String,
file: String,
}
const POLICY_REFUSAL_FIXTURES: &[&str] = &[
"write.json",
"fm-set.json",
"rename.json",
"link.json",
"write-nonexistent-frozen.json",
];
fn invocation_args(invocation: &str) -> Vec<String> {
let mut tokens = Vec::new();
let mut cur = String::new();
let mut in_quote = false;
let mut started = false; for c in invocation.chars() {
match c {
'\'' => {
in_quote = !in_quote;
started = true;
}
c if c.is_whitespace() && !in_quote => {
if started {
tokens.push(std::mem::take(&mut cur));
started = false;
}
}
c => {
cur.push(c);
started = true;
}
}
}
if started {
tokens.push(cur);
}
assert_eq!(
tokens.first().map(String::as_str),
Some("dbmd"),
"invocation starts with `dbmd`"
);
tokens.into_iter().skip(1).collect()
}
#[test]
fn policy_refusals_refuse_with_structured_error_and_do_not_write() {
for fixture in POLICY_REFUSAL_FIXTURES {
let golden: PolicyRefusal = {
let raw =
std::fs::read_to_string(corpus_b_expected(&format!("policy-refusal/{fixture}")))
.unwrap_or_else(|_| panic!("EXPECTED/policy-refusal/{fixture} is committed"));
serde_json::from_str(&raw)
.unwrap_or_else(|e| panic!("policy-refusal/{fixture} is valid JSON: {e}"))
};
assert_eq!(
golden.error.code, "POLICY_FROZEN_PAGE",
"every policy-refusal fixture is a frozen-page refusal"
);
assert!(
golden.exit_code_nonzero && golden.no_write_occurred,
"fixture contract"
);
let (_guard, store) = copy_store_to_temp(&corpus_b());
let target_rel = &golden.error.file;
let target_abs = store.join(target_rel);
let before = std::fs::read(&target_abs).ok();
let args = invocation_args(&golden.invocation);
let out = dbmd()
.current_dir(&store)
.args(&args)
.assert()
.failure() .get_output()
.clone();
let stderr = String::from_utf8(out.stderr).unwrap();
let err: serde_json::Value = serde_json::from_str(stderr.trim()).unwrap_or_else(|e| {
panic!("{fixture}: refusal must emit a JSON error on stderr: {e}\nstderr: {stderr}")
});
assert_eq!(
err["error"]["code"], "POLICY_FROZEN_PAGE",
"{fixture}: the refusal carries the structured POLICY_FROZEN_PAGE code, got {}",
err["error"]
);
let msg = err["error"]["message"].as_str().unwrap_or("");
assert!(
msg.contains(target_rel),
"{fixture}: the refusal message must name the frozen path {target_rel:?}; got {msg:?}"
);
let code = out.status.code().expect("process exited normally");
assert_eq!(
code, 4,
"{fixture}: a frozen-page refusal exits 4 (ExitCode::Policy)"
);
let after = std::fs::read(&target_abs).ok();
assert_eq!(
before,
after,
"{fixture}: the frozen target {target_rel:?} must be byte-for-byte unchanged \
(before-present={}, after-present={})",
before.is_some(),
after.is_some()
);
if before.is_none() {
assert!(
!target_abs.exists(),
"{fixture}: the refused nonexistent frozen path must NOT be created"
);
if let Some(name) = Path::new(target_rel).file_name() {
let sharded = store.join("wiki/topics").join(name);
assert!(
!sharded.exists(),
"{fixture}: the refused write must not slip through to a sharded location {:?}",
sharded
);
}
}
assert_eq!(
md_file_count(&store),
md_file_count(&corpus_b()),
"{fixture}: a refusal must not add or remove any file in the store"
);
}
}
fn md_file_count(root: &Path) -> usize {
fn walk(dir: &Path, n: &mut usize) {
if let Ok(rd) = std::fs::read_dir(dir) {
for e in rd.flatten() {
let p = e.path();
let name = e.file_name();
let name = name.to_str().unwrap_or("");
if name.starts_with('.') {
continue;
}
if p.is_dir() {
walk(&p, n);
} else if name.ends_with(".md") {
*n += 1;
}
}
}
}
let mut n = 0;
walk(root, &mut n);
n
}
#[test]
fn not_a_store_sibling_is_one_issue_and_outside_the_sweep() {
let golden: serde_json::Value = read_json(&corpus_b_expected("not-a-store.json"));
assert!(
golden["exit_code_nonzero"].as_bool().unwrap_or(false),
"the not-a-store fixture exits non-zero"
);
let sibling = corpus_b().join("not-a-store");
let out = dbmd()
.args(["--json", "validate"])
.arg(&sibling)
.assert()
.failure()
.code(6)
.get_output()
.clone();
let report: serde_json::Value =
serde_json::from_str(&String::from_utf8(out.stdout).unwrap()).unwrap();
let issues = report["issues"].as_array().unwrap();
assert_eq!(
issues.len(),
1,
"exactly one issue for the no-store path: {issues:#?}"
);
let golden_issue = &golden["issues"].as_array().unwrap()[0];
let issue = &issues[0];
assert_eq!(issue["code"], golden_issue["code"], "code is NOT_A_STORE");
assert_eq!(issue["code"], "NOT_A_STORE");
assert_eq!(issue["severity"], golden_issue["severity"]);
assert_eq!(issue["line"], golden_issue["line"], "line is null");
assert_eq!(issue["key"], golden_issue["key"], "key is null");
assert_eq!(
issue["related"], golden_issue["related"],
"related is empty"
);
let golden_file = golden_issue["file"].as_str().unwrap();
assert!(
golden_file.ends_with("not-a-store"),
"golden file names the sibling"
);
assert!(
issue["file"]
.as_str()
.unwrap()
.replace('\\', "/")
.ends_with("not-a-store"),
"the emitted NOT_A_STORE file names the no-DB.md sibling, got {}",
issue["file"]
);
let sweep: serde_json::Value = {
let out = dbmd()
.args(["--json", "validate", "--all"])
.arg(corpus_b())
.assert()
.failure()
.get_output()
.clone();
serde_json::from_str(&String::from_utf8(out.stdout).unwrap()).unwrap()
};
for issue in sweep["issues"].as_array().unwrap() {
assert_ne!(
issue["code"], "NOT_A_STORE",
"the store-proper sweep never emits NOT_A_STORE"
);
let file = issue["file"].as_str().unwrap_or("");
assert!(
!file.starts_with("not-a-store"),
"the sweep must not descend into the non-canonical sibling, saw {file:?}"
);
}
}
#[test]
fn bad_db_md_substore_emits_the_three_db_md_codes_and_is_outside_the_sweep() {
let golden: serde_json::Value = read_json(&corpus_b_expected("bad-db-md.json"));
assert!(
golden["exit_code_nonzero"].as_bool().unwrap_or(false),
"the bad-db-md fixture exits non-zero"
);
let comment = golden["_comment"].as_str().unwrap_or("").to_lowercase();
assert!(
comment.contains("hand-derived") && comment.contains("never copied"),
"bad-db-md.json declares hand-derivation and that it is not copied from output"
);
let substore = corpus_b().join("bad-db-md");
let out = dbmd()
.args(["--json", "validate", "--all"])
.arg(&substore)
.assert()
.failure()
.code(6)
.get_output()
.clone();
let report: serde_json::Value =
serde_json::from_str(&String::from_utf8(out.stdout).unwrap()).unwrap();
let got = issue_set(report["issues"].as_array().expect("issues is an array"));
let want = issue_set(golden["issues"].as_array().expect("golden issues array"));
let missing: Vec<&IssueKey> = want.difference(&got).collect();
let extra: Vec<&IssueKey> = got.difference(&want).collect();
assert!(
missing.is_empty() && extra.is_empty(),
"bad-db-md validate must emit EXACTLY the golden issue set.\n\
MISSING (in EXPECTED, not emitted): {missing:#?}\n\
EXTRA (emitted, not in EXPECTED): {extra:#?}"
);
assert_eq!(
code_histogram(report["issues"].as_array().unwrap()),
code_histogram(golden["issues"].as_array().unwrap()),
"per-code counts equal the golden"
);
let codes: BTreeSet<&str> = report["issues"]
.as_array()
.unwrap()
.iter()
.filter_map(|i| i["code"].as_str())
.collect();
assert_eq!(
codes,
BTreeSet::from([
"DB_MD_BAD_TYPE",
"DB_MD_MISSING_FIELD",
"DB_MD_UNKNOWN_SECTION"
]),
"exactly the three DB.md-structure codes fire"
);
for k in ["errors", "warnings", "info", "total"] {
assert_eq!(
report["summary"][k], golden["summary"][k],
"summary.{k} equals the golden"
);
}
let sweep: serde_json::Value = {
let out = dbmd()
.args(["--json", "validate", "--all"])
.arg(corpus_b())
.assert()
.failure()
.get_output()
.clone();
serde_json::from_str(&String::from_utf8(out.stdout).unwrap()).unwrap()
};
for issue in sweep["issues"].as_array().unwrap() {
let code = issue["code"].as_str().unwrap_or("");
assert!(
!code.starts_with("DB_MD_"),
"the corpus-b root sweep's DB.md is clean — no DB_MD_* code, saw {code}"
);
let file = issue["file"].as_str().unwrap_or("").replace('\\', "/");
assert!(
!file.starts_with("bad-db-md"),
"the sweep must not descend into the bad-db-md sibling, saw {file:?}"
);
}
}
#[test]
fn expected_validate_json_is_intent_derived_not_a_snapshot() {
let golden: serde_json::Value = read_json(&corpus_b_expected("validate.json"));
let comment = golden["_comment"].as_str().unwrap_or("");
let lc = comment.to_lowercase();
assert!(
lc.contains("hand-derived") || lc.contains("intent-derived"),
"EXPECTED/validate.json must declare hand/intent derivation in _comment, got {comment:?}"
);
assert!(
lc.contains("spec.md") || lc.contains("spec"),
"the golden anchors itself to SPEC.md, got {comment:?}"
);
assert!(
lc.contains("never copied")
|| lc.contains("not") && lc.contains("snapshot")
|| lc.contains("never be copied")
|| lc.contains("never copied from"),
"the golden states it is not a snapshot of tool output, got {comment:?}"
);
let coverage: serde_json::Value = read_json(&corpus_b_expected("coverage.json"));
let mapped: BTreeSet<String> = coverage["coverage"]
.as_object()
.expect("coverage.coverage is an object")
.keys()
.cloned()
.chain(
coverage
.get("plan_extensions")
.and_then(|p| p.as_object())
.map(|o| o.keys().cloned().collect::<Vec<_>>())
.unwrap_or_default(),
)
.collect();
let emitted: BTreeSet<String> = golden["issues"]
.as_array()
.unwrap()
.iter()
.filter_map(|i| i["code"].as_str().map(String::from))
.collect();
let unmapped: Vec<&String> = emitted.difference(&mapped).collect();
assert!(
unmapped.is_empty(),
"every code in the golden must be mapped to a fixture in coverage.json; unmapped: {unmapped:?}"
);
let spec_codes = spec_validation_codes();
let invented: Vec<&String> = mapped.difference(&spec_codes).collect();
assert!(
invented.is_empty(),
"coverage.json maps only real SPEC codes; not in the SPEC table: {invented:?}"
);
let seeded_spec: BTreeSet<String> = mapped.intersection(&spec_codes).cloned().collect();
let true_uncovered: BTreeSet<String> = spec_codes.difference(&seeded_spec).cloned().collect();
let declared_uncovered: BTreeSet<String> = coverage["uncovered_spec_codes"]
.as_array()
.expect("coverage.json declares uncovered_spec_codes (array)")
.iter()
.filter_map(|c| c.as_str().map(String::from))
.collect();
assert_eq!(
declared_uncovered, true_uncovered,
"coverage.json's uncovered_spec_codes must equal SPEC-codes minus seeded codes \
exactly — no SPEC code may be silently dropped, and none falsely claimed uncovered"
);
let spec_code_count = coverage["spec_code_count"]
.as_u64()
.expect("coverage.json declares spec_code_count (number)")
as usize;
assert_eq!(
spec_code_count,
spec_codes.len(),
"coverage.json's spec_code_count must equal the real SPEC § Validation code count"
);
let all_covered = coverage["all_spec_codes_covered"]
.as_bool()
.expect("coverage.json declares all_spec_codes_covered (bool)");
assert_eq!(
all_covered,
true_uncovered.is_empty(),
"all_spec_codes_covered must be true iff every SPEC code is seeded \
(uncovered: {true_uncovered:?})"
);
for code in [
"DB_MD_BAD_TYPE",
"DB_MD_MISSING_FIELD",
"DB_MD_UNKNOWN_SECTION",
"LAYER_TYPE_MISMATCH",
] {
assert!(
spec_codes.contains(code),
"Block-1 validate code `{code}` must be a row in the live SPEC § Validation table \
(these checks no longer 'await a SPEC code' — the SPEC defines them)"
);
assert!(
seeded_spec.contains(code),
"Block-1 validate code `{code}` must be seeded by a corpus-b fixture in coverage.json \
(the DB.md-identity / layer-type checks are exercised, not just declared)"
);
}
let distinct_files: BTreeSet<&str> = golden["issues"]
.as_array()
.unwrap()
.iter()
.filter_map(|i| i["file"].as_str())
.collect();
assert!(
distinct_files.len() >= 15,
"the breakages are spread across distinct designed fixtures (got {} files)",
distinct_files.len()
);
for issue in golden["issues"].as_array().unwrap() {
assert!(
issue["suggestion"]
.as_str()
.map(|s| !s.is_empty())
.unwrap_or(false),
"each golden issue has a deterministic remediation suggestion: {issue}"
);
}
for issue in golden["issues"].as_array().unwrap() {
let code = issue["code"].as_str().unwrap_or("");
if code.starts_with("DUP_") {
let related = issue["related"].as_array().map(|a| a.len()).unwrap_or(0);
assert!(
related >= 1,
"{code} reports one issue with the partner in `related` (rule #1): {issue}"
);
}
}
}
fn spec_validation_codes() -> BTreeSet<String> {
let spec = std::fs::read_to_string(repo_root().join("SPEC.md")).expect("SPEC.md at repo root");
let mut codes = BTreeSet::new();
for line in spec.lines() {
let t = line.trim_start();
if !t.starts_with("| `") {
continue;
}
if let Some(rest) = t.strip_prefix("| `") {
if let Some((code, _)) = rest.split_once('`') {
if !code.is_empty() && code.chars().all(|c| c.is_ascii_uppercase() || c == '_') {
codes.insert(code.to_string());
}
}
}
}
assert!(
codes.len() >= 30,
"parsed the SPEC validation code table (got {} codes)",
codes.len()
);
codes
}
fn read_json(path: &Path) -> serde_json::Value {
let raw = std::fs::read_to_string(path)
.unwrap_or_else(|_| panic!("committed golden is missing: {}", path.display()));
serde_json::from_str(&raw)
.unwrap_or_else(|e| panic!("golden {} is valid JSON: {e}", path.display()))
}
fn repo_root() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("..")
.join("..")
}