mod common;
use std::collections::BTreeSet;
use std::path::Path;
use common::{
copy_store_to_temp, corpus_a, corpus_a_expected, dbmd, split_frontmatter_body, write_db_md,
write_file,
};
const INDEX_CAP: usize = 500;
#[test]
fn validate_all_is_clean_and_matches_expected_golden() {
let out = dbmd()
.args(["--json", "validate", "--all"])
.arg(corpus_a())
.assert()
.success(); let stdout = String::from_utf8(out.get_output().stdout.clone()).unwrap();
let report: serde_json::Value = serde_json::from_str(&stdout).expect("validate emits JSON");
assert_eq!(report["scope"], "all", "`--all` is the full SWEEP scope");
assert_eq!(report["summary"]["errors"], 0);
assert_eq!(report["summary"]["warnings"], 0);
assert_eq!(report["summary"]["info"], 0);
assert_eq!(report["summary"]["total"], 0);
let expected_issues: serde_json::Value = {
let raw = std::fs::read_to_string(corpus_a_expected("validate.json"))
.expect("EXPECTED/validate.json is committed");
serde_json::from_str(&raw).expect("EXPECTED/validate.json is valid JSON")
};
assert_eq!(
expected_issues,
serde_json::json!([]),
"the committed golden pins zero issues for the canonical store"
);
assert_eq!(
report["issues"], expected_issues,
"validate --all issues must equal EXPECTED/validate.json: got {}",
report["issues"]
);
}
#[derive(serde::Deserialize)]
struct SearchCase {
query: String,
#[serde(default)]
args: Vec<String>,
matches: Vec<String>,
}
#[derive(serde::Deserialize)]
struct SearchGolden {
queries: Vec<SearchCase>,
}
fn search_golden() -> Vec<SearchCase> {
let raw = std::fs::read_to_string(corpus_a_expected("search.json"))
.expect("EXPECTED/search.json is committed");
let doc: SearchGolden = serde_json::from_str(&raw).expect("EXPECTED/search.json is valid JSON");
assert!(
!doc.queries.is_empty(),
"the golden has at least one query case"
);
doc.queries
.into_iter()
.filter(|c| !c.query.is_empty())
.collect()
}
fn search_files_json(case: &SearchCase) -> BTreeSet<String> {
let out = dbmd()
.arg("--json")
.arg("search")
.arg(&case.query)
.args(&case.args)
.arg("--dir")
.arg(corpus_a())
.assert()
.success();
let stdout = String::from_utf8(out.get_output().stdout.clone()).unwrap();
let matches: serde_json::Value = serde_json::from_str(&stdout).expect("search --json is JSON");
matches
.as_array()
.expect("search --json is an array")
.iter()
.map(|m| {
m["file"]
.as_str()
.expect("each match has a file")
.to_string()
})
.collect()
}
fn search_files_text(case: &SearchCase) -> BTreeSet<String> {
let out = dbmd()
.arg("search")
.arg(&case.query)
.args(&case.args)
.arg("--dir")
.arg(corpus_a())
.assert()
.success();
let stdout = String::from_utf8(out.get_output().stdout.clone()).unwrap();
stdout
.lines()
.filter(|l| !l.is_empty())
.map(|l| {
l.split_once(':')
.expect("rg-shaped output is file:line:text")
.0
.to_string()
})
.collect()
}
#[test]
fn search_golden_cases_return_the_expected_files() {
for case in search_golden() {
let expected: BTreeSet<String> = case.matches.iter().cloned().collect();
let json_files = search_files_json(&case);
assert_eq!(
json_files, expected,
"search --json for {:?} {:?} must return the golden file set",
case.query, case.args
);
let text_files = search_files_text(&case);
assert_eq!(
text_files, expected,
"search (text mode) for {:?} {:?} must match the golden file set",
case.query, case.args
);
}
}
#[test]
fn search_no_match_is_empty_success() {
let out = dbmd()
.arg("search")
.arg("zzz-no-such-term-anywhere-zzz")
.arg("--dir")
.arg(corpus_a())
.assert()
.success();
assert!(
out.get_output().stdout.is_empty(),
"a no-match search prints nothing"
);
}
#[test]
fn fm_set_summary_round_trip_preserves_body_and_applies_change() {
let (_guard, store) = copy_store_to_temp(&corpus_a());
let rel = "records/contacts/marcus-okafor.md";
let path = store.join(rel);
let original_text = std::fs::read_to_string(&path).unwrap();
let (_orig_fm, original_body) =
split_frontmatter_body(&original_text).expect("the record opens with frontmatter");
let old_summary = {
let out = dbmd()
.current_dir(&store)
.args(["fm", "get", rel, "summary"])
.assert()
.success();
String::from_utf8(out.get_output().stdout.clone())
.unwrap()
.trim()
.to_string()
};
let new_summary = "Hand-curated: ops analyst who joined the Northstar renewal thread";
assert_ne!(old_summary, new_summary, "the change must be observable");
dbmd()
.current_dir(&store)
.args(["fm", "set", rel, &format!("summary={new_summary}")])
.assert()
.success();
let updated_text = std::fs::read_to_string(&path).unwrap();
let (_new_fm, updated_body) =
split_frontmatter_body(&updated_text).expect("the record still has frontmatter");
assert_eq!(
updated_body, original_body,
"the operator-edited body must round-trip byte-for-byte across `fm set`"
);
let reparsed_summary = {
let out = dbmd()
.current_dir(&store)
.args(["fm", "get", rel, "summary"])
.assert()
.success();
String::from_utf8(out.get_output().stdout.clone())
.unwrap()
.trim()
.to_string()
};
assert_eq!(
reparsed_summary, new_summary,
"re-parsing after `fm set` returns the new summary"
);
let jsonl = std::fs::read_to_string(store.join("records/contacts/index.jsonl")).unwrap();
let marcus_line = jsonl
.lines()
.find(|l| l.contains("marcus-okafor"))
.expect("marcus stays indexed");
let rec: serde_json::Value = serde_json::from_str(marcus_line).unwrap();
assert_eq!(
rec["summary"],
serde_json::json!(new_summary),
"the sidecar carries the new summary verbatim after the round-trip"
);
}
struct Artifact {
path: String,
content: String,
}
fn parse_dry_run(stdout: &str) -> Vec<Artifact> {
let mut artifacts: Vec<Artifact> = Vec::new();
let mut cur_path: Option<String> = None;
let mut cur_body = String::new();
for line in stdout.split_inclusive('\n') {
let trimmed = line.trim_end_matches(['\r', '\n']);
if let Some(rest) = trimmed.strip_prefix("--- ") {
if let Some(path) = rest.strip_suffix(" ---") {
if let Some(prev) = cur_path.take() {
artifacts.push(Artifact {
path: prev,
content: std::mem::take(&mut cur_body),
});
}
cur_path = Some(path.to_string());
continue;
}
}
if cur_path.is_some() {
cur_body.push_str(line);
}
}
if let Some(prev) = cur_path.take() {
artifacts.push(Artifact {
path: prev,
content: cur_body,
});
}
artifacts
}
fn entry_lines(content: &str) -> Vec<&str> {
content.lines().filter(|l| l.starts_with("- [[")).collect()
}
fn fm_field<'a>(content: &'a str, key: &str) -> Option<&'a str> {
let (fm, _) = split_frontmatter_body(content)?;
let needle = format!("{key}:");
fm.lines()
.find_map(|l| l.trim().strip_prefix(&needle).map(str::trim))
}
fn count_md_files(folder: &Path) -> usize {
fn walk(dir: &Path, n: &mut usize) {
if let Ok(rd) = std::fs::read_dir(dir) {
for e in rd.flatten() {
let p = e.path();
if p.is_dir() {
walk(&p, n);
} else if p.extension().and_then(|x| x.to_str()) == Some("md")
&& p.file_name().and_then(|x| x.to_str()) != Some("index.md")
{
*n += 1;
}
}
}
}
let mut n = 0;
walk(folder, &mut n);
n
}
fn audit_index_artifacts(artifacts: &[Artifact], store_root: &Path) -> usize {
use std::collections::BTreeMap;
let by_path: BTreeMap<&str, &str> = artifacts
.iter()
.map(|a| (a.path.as_str(), a.content.as_str()))
.collect();
let mut type_folders_audited = 0;
for a in artifacts.iter().filter(|a| a.path.ends_with("index.jsonl")) {
let folder = a
.path
.strip_suffix("/index.jsonl")
.expect("jsonl path ends with /index.jsonl");
let lines: Vec<&str> = a.content.lines().filter(|l| !l.trim().is_empty()).collect();
let n_files = count_md_files(&store_root.join(folder));
assert_eq!(
lines.len(),
n_files,
"jsonl completeness: {} has {} lines but the folder has {} .md files (the jsonl twin is uncapped + complete)",
a.path,
lines.len(),
n_files
);
for l in &lines {
let rec: serde_json::Value = serde_json::from_str(l)
.unwrap_or_else(|e| panic!("{}: bad JSON line: {e}\n{l}", a.path));
for key in [
"path", "type", "summary", "tags", "links", "created", "updated",
] {
assert!(
rec.get(key).is_some(),
"jsonl record in {} is missing universal key `{key}`: {l}",
a.path
);
}
}
}
for a in artifacts.iter().filter(|a| a.path.ends_with("index.md")) {
let scope = fm_field(&a.content, "scope").unwrap_or("");
if scope != "type-folder" {
continue;
}
type_folders_audited += 1;
let folder = fm_field(&a.content, "folder").expect("type-folder index has a folder field");
let n_files = count_md_files(&store_root.join(folder));
let entries = entry_lines(&a.content);
let has_more = a.content.contains("## More");
if n_files <= INDEX_CAP {
assert_eq!(
entries.len(),
n_files,
"cap (under): {} lists {} entries but the folder has {} files — all must be listed",
a.path,
entries.len(),
n_files
);
assert!(
!has_more,
"cap (under): {} has a `## More` footer but is under the {INDEX_CAP} cap ({n_files} files)",
a.path
);
} else {
assert_eq!(
entries.len(),
INDEX_CAP,
"cap (over): {} lists {} entries; the browse view caps at {INDEX_CAP}",
a.path,
entries.len()
);
assert!(
has_more,
"cap (over): {} is over the cap ({n_files} > {INDEX_CAP}) but has no `## More` footer",
a.path
);
assert!(
a.content
.contains(&format!("This folder has {n_files} files")),
"the `## More` footer must state the true file count ({n_files}) in {}",
a.path
);
assert!(
a.content.contains("dbmd index query"),
"the `## More` footer must point at `dbmd index query` for the complete catalog in {}",
a.path
);
}
let jsonl = by_path
.get(format!("{folder}/index.jsonl").as_str())
.expect("every type-folder index.md has a jsonl twin in the same dry-run");
let summaries: std::collections::HashMap<String, String> = jsonl
.lines()
.filter(|l| !l.trim().is_empty())
.map(|l| {
let rec: serde_json::Value = serde_json::from_str(l).unwrap();
(
rec["path"].as_str().unwrap().to_string(),
rec["summary"].as_str().unwrap_or("").to_string(),
)
})
.collect();
let mut checked = 0;
for entry in &entries {
let inner = entry
.strip_prefix("- [[")
.and_then(|s| s.split_once("]]"))
.map(|(t, rest)| (t.to_string(), rest));
let Some((target, rest)) = inner else {
continue;
};
let summary = match summaries.get(&format!("{target}.md")) {
Some(s) if !s.is_empty() => s,
_ => continue,
};
assert!(
rest.contains(summary.as_str()),
"summary-verbatim: entry for {target} in {} does not quote its summary {summary:?} (entry: {entry:?})",
a.path
);
checked += 1;
}
assert!(
checked > 0,
"summary-verbatim: audited 0 entries for {} — the audit must check something",
a.path
);
}
type_folders_audited
}
#[test]
fn index_rebuild_dry_run_audits_clean_and_matches_committed_goldens() {
let out = dbmd()
.current_dir(corpus_a())
.args(["index", "rebuild", "--dry-run"])
.assert()
.success();
let stdout = String::from_utf8(out.get_output().stdout.clone()).unwrap();
let artifacts = parse_dry_run(&stdout);
assert!(
!artifacts.is_empty(),
"the dry-run previews at least one artifact"
);
let audited = audit_index_artifacts(&artifacts, &corpus_a());
assert!(
audited >= 5,
"audited {audited} type-folders — the corpus has several"
);
let manifest = std::fs::read_to_string(corpus_a_expected("index/ARTIFACTS.txt"))
.expect("EXPECTED/index/ARTIFACTS.txt is committed");
let expected_order: Vec<&str> = manifest.lines().filter(|l| !l.is_empty()).collect();
let got_order: Vec<&str> = artifacts.iter().map(|a| a.path.as_str()).collect();
assert_eq!(
got_order, expected_order,
"the dry-run must emit exactly the golden artifact set, in the golden order"
);
for a in &artifacts {
let golden_path = corpus_a_expected(&format!("index/{}", a.path));
let golden = std::fs::read_to_string(&golden_path)
.unwrap_or_else(|_| panic!("missing committed golden: EXPECTED/index/{}", a.path));
assert_eq!(
a.content, golden,
"dry-run output for {} must be byte-identical to its audited golden EXPECTED/index/{}",
a.path, a.path
);
}
}
#[test]
fn index_rebuild_audits_the_over_cap_more_branch() {
let tmp = tempfile::TempDir::new().unwrap();
let store = tmp.path();
write_db_md(store);
let total = INDEX_CAP + 1;
for i in 0..total {
let day = 1 + (i % 28);
let month = 1 + (i / 28) % 12;
let ts = format!("2026-{month:02}-{day:02}T00:00:00Z");
write_file(
store,
&format!("records/notes/n{i:04}.md"),
&format!(
"---\ntype: note\ncreated: {ts}\nupdated: {ts}\nsummary: note number {i}\n---\n\n# Note {i}\n"
),
);
}
dbmd()
.current_dir(store)
.args(["index", "rebuild", "--folder", "records/notes"])
.assert()
.success();
let index_md = std::fs::read_to_string(store.join("records/notes/index.md")).unwrap();
let index_jsonl = std::fs::read_to_string(store.join("records/notes/index.jsonl")).unwrap();
let artifacts = vec![
Artifact {
path: "records/notes/index.md".into(),
content: index_md.clone(),
},
Artifact {
path: "records/notes/index.jsonl".into(),
content: index_jsonl.clone(),
},
];
let audited = audit_index_artifacts(&artifacts, store);
assert_eq!(
audited, 1,
"exactly the one synthetic type-folder is audited"
);
assert_eq!(
entry_lines(&index_md).len(),
INDEX_CAP,
"index.md caps the browse view at {INDEX_CAP} entries"
);
assert!(
index_md.contains("## More"),
"over-cap index.md carries the ## More footer"
);
assert!(
index_md.contains(&format!("This folder has {total} files")),
"the footer states the true (uncapped) file count"
);
assert_eq!(
index_jsonl.lines().filter(|l| !l.trim().is_empty()).count(),
total,
"index.jsonl is the uncapped, complete twin ({total} records)"
);
}