use std::collections::BTreeSet;
use std::path::{Path, PathBuf};
use dbmd_core::graph::{backlinks, backlinks_filtered, forwardlinks};
use dbmd_core::parser::{parse_db_md, Shape};
use dbmd_core::store::{Layer, Store};
fn corpora_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("..")
.join("..")
.join("tests")
.join("corpora")
}
fn open_corpus_a() -> Store {
let path = corpora_dir().join("corpus-a-canonical");
Store::open(&path).expect("corpus-a-canonical is a db.md store (has DB.md)")
}
fn open_corpus_b() -> Store {
let path = corpora_dir().join("corpus-b-edges");
Store::open(&path).expect("corpus-b-edges is a db.md store (has DB.md)")
}
fn as_sorted_strings(paths: &[PathBuf]) -> Vec<String> {
let mut v: Vec<String> = paths
.iter()
.map(|p| p.to_string_lossy().replace('\\', "/"))
.collect();
v.sort();
v
}
fn owned(strs: &[&str]) -> Vec<String> {
strs.iter().map(|s| s.to_string()).collect()
}
#[test]
fn corpus_a_walk_sources_layer_is_the_six_known_files() {
let store = open_corpus_a();
let got = as_sorted_strings(&store.walk_layer(Layer::Sources).expect("walk sources"));
assert_eq!(
got,
owned(&[
"sources/docs/2026-03-15-northstar-msa.md",
"sources/docs/2026-04-30-aws-invoice.md",
"sources/emails/2026/04/2026-04-03-figma-renewal-notice.md",
"sources/emails/2026/04/2026-04-28-aws-invoice-available.md",
"sources/emails/2026/05/2026-05-12-marcus-intro.md",
"sources/emails/2026/05/2026-05-22-elena-renewal.md",
]),
"sources walk must recurse date-shards and exclude index.md/index.jsonl"
);
}
#[test]
fn corpus_a_walk_wiki_layer_is_the_four_known_pages() {
let store = open_corpus_a();
let got = as_sorted_strings(&store.walk_layer(Layer::Wiki).expect("walk wiki"));
assert_eq!(
got,
owned(&[
"wiki/people/elena-rodriguez.md",
"wiki/people/sarah-chen.md",
"wiki/projects/northstar-renewal.md",
"wiki/synthesis/2026-renewal-plan.md",
]),
"wiki walk must list every topic-folder page and nothing else"
);
}
#[test]
fn corpus_a_walk_is_content_only_no_meta_no_sidecar_no_log() {
let store = open_corpus_a();
let all = store.walk().expect("walk corpus-a");
let set: BTreeSet<String> = as_sorted_strings(&all).into_iter().collect();
assert_eq!(
all.len(),
515,
"expected 6 sources + 505 records + 4 wiki content files"
);
assert_eq!(store.walk_layer(Layer::Sources).unwrap().len(), 6);
assert_eq!(store.walk_layer(Layer::Records).unwrap().len(), 505);
assert_eq!(store.walk_layer(Layer::Wiki).unwrap().len(), 4);
for excluded in [
"DB.md",
"index.md",
"log.md",
"records/contacts/index.md",
"records/contacts/index.jsonl",
"sources/emails/index.md",
"sources/emails/index.jsonl",
"wiki/people/index.md",
] {
assert!(
!set.contains(excluded),
"Store::walk must not yield the meta/sidecar file {excluded}"
);
}
for included in [
"sources/emails/2026/05/2026-05-22-elena-renewal.md",
"records/contacts/sarah-chen.md",
"wiki/projects/northstar-renewal.md",
] {
assert!(
set.contains(included),
"Store::walk must yield the content file {included}"
);
}
}
#[test]
fn corpus_a_find_links_to_sarah_chen_includes_catalog() {
let store = open_corpus_a();
let all = store
.find_links_to(Path::new("records/contacts/sarah-chen"))
.expect("find_links_to sarah-chen");
let all_set: BTreeSet<String> = as_sorted_strings(&all).into_iter().collect();
let in_layers: Vec<String> = all_set
.iter()
.filter(|p| {
p.starts_with("sources/") || p.starts_with("records/") || p.starts_with("wiki/")
})
.cloned()
.collect();
assert_eq!(
in_layers,
owned(&[
"records/companies/northstar.md",
"records/contacts/index.md",
"records/meetings/2026/04/2026-04-15-northstar-quarterly-review.md",
"records/meetings/2026/05/2026-05-22-northstar-renewal-call.md",
"wiki/people/sarah-chen.md",
"wiki/projects/northstar-renewal.md",
]),
"find_links_to scans every .md (catalogs included) for the literal link"
);
assert!(
all_set.contains("records/contacts/index.md"),
"the index.md catalog carries the link and must be returned"
);
assert!(
!all_set.iter().any(|p| p.ends_with(".jsonl")),
"find_links_to is a .md scan; an index.jsonl is never a hit"
);
}
#[test]
fn corpus_b_find_links_to_dangling_target_finds_the_one_referrer() {
let store = open_corpus_b();
assert!(
!store.root.join("records/contacts/ghost.md").exists(),
"fixture invariant: records/contacts/ghost.md must not exist"
);
let got = as_sorted_strings(
&store
.find_links_to(Path::new("records/contacts/ghost"))
.expect("find_links_to ghost"),
);
assert_eq!(
got,
owned(&["records/misc/broken-link.md"]),
"a broken link's target still has exactly its one referrer"
);
}
#[test]
fn corpus_a_backlinks_sarah_chen_is_content_only_bare_form() {
let store = open_corpus_a();
let got = as_sorted_strings(
&backlinks(&store, Path::new("records/contacts/sarah-chen")).expect("backlinks sarah-chen"),
);
assert_eq!(
got,
owned(&[
"records/companies/northstar",
"records/meetings/2026/04/2026-04-15-northstar-quarterly-review",
"records/meetings/2026/05/2026-05-22-northstar-renewal-call",
"wiki/people/sarah-chen",
"wiki/projects/northstar-renewal",
]),
"backlinks: content files only (no index.md catalog), bare no-.md paths"
);
}
#[test]
fn corpus_a_forwardlinks_sarah_chen_spans_frontmatter_and_body_deduped() {
let store = open_corpus_a();
let got = as_sorted_strings(
&forwardlinks(&store, Path::new("records/contacts/sarah-chen.md"))
.expect("forwardlinks sarah-chen"),
);
assert_eq!(
got,
owned(&[
"records/companies/northstar",
"records/meetings/2026/05/2026-05-22-northstar-renewal-call",
"wiki/projects/northstar-renewal",
]),
"forwardlinks must include the frontmatter `company` link, deduped"
);
}
#[test]
fn corpus_a_forwardlinks_accepts_bare_seed_and_dedups_single_edge() {
let store = open_corpus_a();
let bare = forwardlinks(&store, Path::new("records/contacts/marcus-okafor"))
.expect("forwardlinks bare seed");
let dotted = forwardlinks(&store, Path::new("records/contacts/marcus-okafor.md"))
.expect("forwardlinks dotted seed");
assert_eq!(
as_sorted_strings(&bare),
owned(&["records/companies/northstar"]),
"marcus-okafor links only to its company (deduped from two mentions)"
);
assert_eq!(
as_sorted_strings(&bare),
as_sorted_strings(&dotted),
"bare and .md seed spellings resolve to the same file and edge set"
);
}
#[test]
fn corpus_a_forwardlinks_extracts_a_frontmatter_only_edge() {
let store = open_corpus_a();
let got = forwardlinks(
&store,
Path::new("records/meetings/2026/05/2026-05-12-internal-renewal-sync.md"),
)
.expect("forwardlinks internal-renewal-sync");
assert_eq!(
as_sorted_strings(&got),
owned(&["records/contacts/david-kim"]),
"the only edge (a frontmatter `attendees:` link) must be extracted"
);
}
#[test]
fn corpus_a_backlinks_david_kim_includes_frontmatter_only_edges() {
let store = open_corpus_a();
let unscoped = as_sorted_strings(
&backlinks(&store, Path::new("records/contacts/david-kim")).expect("backlinks david-kim"),
);
assert_eq!(
unscoped,
owned(&[
"records/companies/acme",
"records/meetings/2026/04/2026-04-15-northstar-quarterly-review",
"records/meetings/2026/05/2026-05-12-internal-renewal-sync",
"records/meetings/2026/05/2026-05-22-northstar-renewal-call",
]),
"unscoped backlinks (ripgrep scan) must find every literal `[[…david-kim]]`, \
frontmatter or body"
);
let scoped = as_sorted_strings(
&backlinks_filtered(
&store,
Path::new("records/contacts/david-kim"),
&["meeting".to_string()],
None,
)
.expect("scoped backlinks david-kim --type meeting"),
);
assert_eq!(
scoped,
owned(&[
"records/meetings/2026/04/2026-04-15-northstar-quarterly-review",
"records/meetings/2026/05/2026-05-12-internal-renewal-sync",
"records/meetings/2026/05/2026-05-22-northstar-renewal-call",
]),
"scoped backlinks confirms candidates by parse; the two attendees-only \
meetings must survive a frontmatter-aware confirm-read"
);
}
#[test]
fn corpus_a_backlinks_and_forwardlinks_agree_on_one_edge_set() {
let store = open_corpus_a();
let meeting = "records/meetings/2026/05/2026-05-22-northstar-renewal-call";
let incoming = backlinks(&store, Path::new(meeting)).expect("backlinks meeting");
assert!(
!incoming.is_empty(),
"the renewal-call meeting is referenced by several records — \
a non-empty backlink set is part of the fixture's intent"
);
for linker in &incoming {
let out = forwardlinks(&store, linker).expect("forwardlinks of a backlinker");
let out_set: BTreeSet<String> = as_sorted_strings(&out).into_iter().collect();
assert!(
out_set.contains(meeting),
"{} is reported as a backlink of {meeting} but its forwardlinks \
do not contain it — the two directions disagree",
linker.display()
);
}
}
#[test]
fn corpus_a_db_md_schemas_and_policies_parse_per_spec() {
let store = open_corpus_a();
let cfg = &store.config;
let raw = std::fs::read_to_string(store.root.join("DB.md")).expect("read corpus-a DB.md");
let parsed = parse_db_md(&raw, &store.root.join("DB.md")).expect("parse corpus-a DB.md");
assert_eq!(
&parsed, cfg,
"Store::open must surface exactly what parse_db_md produces"
);
let instructions = cfg
.agent_instructions
.as_deref()
.expect("corpus-a DB.md declares ## Agent instructions");
assert!(
instructions.contains("British English"),
"agent instructions prose must be captured verbatim, got: {instructions:?}"
);
assert_eq!(
cfg.frozen_pages,
vec![PathBuf::from("wiki/synthesis/2026-renewal-plan.md")],
"the one frozen page must parse from its bullet"
);
assert_eq!(
cfg.ignored_types,
vec!["test".to_string()],
"the one ignored type must parse from its bullet"
);
let schema_types: BTreeSet<&str> = cfg.schemas.keys().map(|s| s.as_str()).collect();
assert_eq!(
schema_types,
BTreeSet::from(["contact", "company", "expense", "meeting", "invoice"]),
"every ### <type> sub-section must become a schema"
);
let contact = &cfg.schemas["contact"];
let name = contact
.fields
.iter()
.find(|f| f.name == "name")
.expect("contact.name");
assert!(name.required && name.shape == Some(Shape::String));
let email = contact
.fields
.iter()
.find(|f| f.name == "email")
.expect("contact.email");
assert!(email.required && email.shape == Some(Shape::Email));
let company = contact
.fields
.iter()
.find(|f| f.name == "company")
.expect("contact.company");
assert!(company.required, "contact.company is required");
assert_eq!(
company.link_prefix.as_deref(),
Some(Path::new("records/companies")),
"`link to records/companies/` must parse with the trailing slash dropped"
);
assert!(
company.shape.is_none(),
"a link field carries no scalar shape"
);
let role = contact
.fields
.iter()
.find(|f| f.name == "role")
.expect("contact.role");
assert!(
!role.required && role.shape == Some(Shape::String),
"role is an optional string"
);
let company_schema = &cfg.schemas["company"];
let relationship = company_schema
.fields
.iter()
.find(|f| f.name == "relationship")
.expect("company.relationship");
assert_eq!(
relationship.enum_values.as_deref(),
Some(
&[
"customer".to_string(),
"vendor".to_string(),
"partner".to_string(),
"prospect".to_string(),
][..]
),
"enum: must parse the full comma-separated option list"
);
assert!(
!relationship.required && relationship.shape.is_none(),
"an enum-only field is optional and has no scalar shape"
);
let expense = &cfg.schemas["expense"];
let currency = expense
.fields
.iter()
.find(|f| f.name == "currency")
.expect("expense.currency");
assert_eq!(
currency.default,
Some(serde_yml::Value::String("USD".to_string())),
"`default USD` must parse into the field's default value"
);
let invoice = &cfg.schemas["invoice"];
let amount = invoice
.fields
.iter()
.find(|f| f.name == "amount")
.expect("invoice.amount");
assert!(
amount.required && amount.shape == Some(Shape::Currency),
"invoice.amount is a required currency"
);
}
#[test]
fn corpus_a_and_b_meeting_schemas_differ_on_attendees_link() {
let a = open_corpus_a();
let b = open_corpus_b();
let a_attendees = a.config.schemas["meeting"]
.fields
.iter()
.find(|f| f.name == "attendees")
.expect("corpus-a meeting.attendees");
assert!(
a_attendees.required && a_attendees.link_prefix.is_none(),
"corpus-a meeting.attendees is a bare required field (no link prefix)"
);
let b_attendees = b.config.schemas["meeting"]
.fields
.iter()
.find(|f| f.name == "attendees")
.expect("corpus-b meeting.attendees");
assert!(
b_attendees.required,
"corpus-b meeting.attendees is required"
);
assert_eq!(
b_attendees.link_prefix.as_deref(),
Some(Path::new("records/contacts")),
"corpus-b declares `attendees (required, link to records/contacts/)`"
);
assert_ne!(
a_attendees.link_prefix, b_attendees.link_prefix,
"the two corpora's meeting.attendees must parse differently"
);
}
#[test]
fn corpus_b_bad_db_md_parses_known_sections_and_ignores_unknown() {
let bad = corpora_dir().join("corpus-b-edges").join("bad-db-md");
let db_md = bad.join("DB.md");
let raw = std::fs::read_to_string(&db_md).expect("read bad-db-md DB.md");
let cfg = parse_db_md(&raw, &db_md).expect("parse_db_md is lenient on unknown sections");
assert!(
cfg.agent_instructions
.as_deref()
.map(|s| s.contains("Recognized section"))
.unwrap_or(false),
"the recognized ## Agent instructions must still be captured"
);
assert!(
cfg.schemas.is_empty() && cfg.frozen_pages.is_empty() && cfg.ignored_types.is_empty(),
"an unknown H2 must not leak into schemas/policies"
);
}