use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::path::{Path, PathBuf};
use chrono::{DateTime, FixedOffset, NaiveDateTime};
use serde_yml::Value;
use crate::parser::{FieldSpec, Schema, Shape};
use crate::store::Store;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Severity {
Error,
Warning,
Info,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Issue {
pub severity: Severity,
pub code: &'static str,
pub file: PathBuf,
pub line: Option<u32>,
pub key: Option<String>,
pub message: String,
pub suggestion: Option<String>,
pub related: Vec<PathBuf>,
}
impl Issue {
pub fn is_error(&self) -> bool {
matches!(self.severity, Severity::Error)
}
}
pub mod codes {
pub const NOT_A_STORE: &str = "NOT_A_STORE";
pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
pub const LAYER_TYPE_MISMATCH: &str = "LAYER_TYPE_MISMATCH";
pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
pub const DUP_ID: &str = "DUP_ID";
pub const DUP_CONTACT_EMAIL: &str = "DUP_CONTACT_EMAIL";
pub const DUP_COMPANY_DOMAIN: &str = "DUP_COMPANY_DOMAIN";
pub const DUP_EXPENSE_TUPLE: &str = "DUP_EXPENSE_TUPLE";
pub const DUP_INVOICE_TUPLE: &str = "DUP_INVOICE_TUPLE";
pub const DUP_EMAIL_REINGEST: &str = "DUP_EMAIL_REINGEST";
pub const DUP_MEETING_TUPLE: &str = "DUP_MEETING_TUPLE";
pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
pub const INDEX_MISSING: &str = "INDEX_MISSING";
pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
}
const MAX_SUMMARY_LEN: usize = 200;
const RECOGNIZED_LOG_KINDS: &[&str] = &[
"ingest",
"create",
"update",
"delete",
"rename",
"link",
"validate",
"index-rebuild",
"contradiction",
];
pub fn validate_working_set(
store: &Store,
since: Option<DateTime<FixedOffset>>,
) -> crate::Result<Vec<Issue>> {
if !store_marker_present(store) {
return Ok(vec![not_a_store_issue(store)]);
}
let cutoff = match since {
Some(ts) => Some(ts),
None => last_validate_at(store),
};
let changed = changed_objects_since(store, cutoff);
let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
let mut working: BTreeSet<PathBuf> = changed;
for linker in store.find_links_to_any(&changed_targets)? {
working.insert(linker);
}
let mut issues = Vec::new();
for rel in &working {
let abs = store.root.join(rel);
if !abs.is_file() {
continue;
}
check_content_file(store, rel, &abs, None, &mut issues);
}
issues.sort_by(issue_order);
Ok(issues)
}
pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
if !store_marker_present(store) {
return Ok(vec![not_a_store_issue(store)]);
}
let mut issues = Vec::new();
check_db_md(store, &mut issues);
let files = walk_content_files(&store.root);
let basenames = build_basename_index(&files);
let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
for rel in &files {
let abs = store.root.join(rel);
if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
parsed.push((rel.clone(), p));
}
}
check_duplicates(&parsed, &mut issues);
check_indexes(store, &files, &mut issues);
check_log(store, &mut issues);
issues.sort_by(issue_order);
Ok(issues)
}
struct Parsed {
fm: Option<BTreeMap<String, Value>>,
fm_yaml: String,
}
fn check_content_file(
store: &Store,
rel: &Path,
abs: &Path,
basenames: Option<&BasenameIndex>,
issues: &mut Vec<Issue>,
) -> Option<Parsed> {
let text = match std::fs::read_to_string(abs) {
Ok(t) => t,
Err(_) => return None,
};
let is_content = is_content_file(rel);
let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
Some(split) => split,
None => {
if is_content {
push(
issues,
Severity::Error,
codes::FM_MISSING_TYPE,
rel,
None,
Some("type".into()),
"content file has no frontmatter `type:`".into(),
Some("add a YAML frontmatter block with `type:`".into()),
vec![],
);
push(
issues,
Severity::Error,
codes::SUMMARY_MISSING,
rel,
None,
Some("summary".into()),
"content file has no `summary`".into(),
Some("run `dbmd fm init`".into()),
vec![],
);
}
return None;
}
};
let fm: Option<BTreeMap<String, Value>> = match serde_yml::from_str::<Value>(&fm_yaml) {
Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
Ok(Value::Null) => Some(BTreeMap::new()),
Ok(_) => {
push(
issues,
Severity::Error,
codes::FM_MALFORMED_YAML,
rel,
Some(1),
None,
"frontmatter is not a YAML mapping".into(),
None,
vec![],
);
None
}
Err(e) => {
push(
issues,
Severity::Error,
codes::FM_MALFORMED_YAML,
rel,
Some(1),
None,
format!("frontmatter block isn't valid YAML: {e}"),
None,
vec![],
);
None
}
};
if let Some(map) = &fm {
check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
}
check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
Some(Parsed { fm, fm_yaml })
}
fn check_frontmatter(
store: &Store,
rel: &Path,
fm: &BTreeMap<String, Value>,
fm_yaml: &str,
basenames: Option<&BasenameIndex>,
issues: &mut Vec<Issue>,
is_content: bool,
) {
let type_ = fm.get("type").and_then(scalar_string);
if is_content && type_.is_none() {
push(
issues,
Severity::Error,
codes::FM_MISSING_TYPE,
rel,
fm_key_line_or_top(fm_yaml, "type"),
Some("type".into()),
"content file has no `type:`".into(),
Some("add a `type:` field (e.g. `type: contact`)".into()),
vec![],
);
}
if is_content {
if let Some(t) = &type_ {
if let (Some(expected), Some(actual)) = (canonical_layer_for_type(t), layer_of(rel)) {
if expected != actual {
push(
issues,
Severity::Warning,
codes::LAYER_TYPE_MISMATCH,
rel,
fm_key_line(fm_yaml, "type"),
Some("type".into()),
format!(
"type `{t}` belongs in `{expected}/` but this file is under `{actual}/`"
),
Some(format!(
"move the file under `{expected}/` (its canonical layer), or change its `type:`"
)),
vec![],
);
}
}
}
}
if is_content {
check_summary(rel, fm, fm_yaml, issues);
}
for key in ["created", "updated"] {
if let Some(v) = fm.get(key) {
if let Some(s) = scalar_string(v) {
if !is_iso8601(&s) {
push(
issues,
Severity::Error,
codes::FM_BAD_TIMESTAMP,
rel,
fm_key_line(fm_yaml, key),
Some(key.into()),
format!("`{key}` is not ISO-8601: {s:?}"),
Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
vec![],
);
}
}
}
}
if let Some(t) = &type_ {
let schema_date_fields = schema_shaped_date_fields(store, t);
for key in canonical_date_fields(t) {
if schema_date_fields.contains(*key) {
continue; }
if let Some(v) = fm.get(*key) {
if let Some(s) = scalar_string(v) {
if !is_iso8601_date_or_datetime(&s) {
push(
issues,
Severity::Error,
codes::FM_BAD_TIMESTAMP,
rel,
fm_key_line(fm_yaml, key),
Some((*key).into()),
format!("`{key}` is not an ISO-8601 date: {s:?}"),
Some("use an ISO-8601 date, e.g. 2026-05-27".into()),
vec![],
);
}
}
}
}
}
if let Some(tags) = fm.get("tags") {
if !is_flat_scalar_list(tags) {
push(
issues,
Severity::Warning,
codes::TAGS_MALFORMED,
rel,
fm_key_line(fm_yaml, "tags"),
Some("tags".into()),
"`tags` must be a flat YAML list of short scalar labels".into(),
Some("use block form: one `- <tag>` per line".into()),
vec![],
);
}
}
for key in detect_flow_form_link_lists(fm_yaml) {
push(
issues,
Severity::Error,
codes::WIKI_LINK_FLOW_FORM_LIST,
rel,
fm_key_line(fm_yaml, &key),
Some(key.clone()),
format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
vec![],
);
}
let schema_link_keys: BTreeSet<String> =
effective_schema(store, type_.as_deref().unwrap_or(""))
.map(|s| {
s.fields
.iter()
.filter(|f| f.link_prefix.is_some())
.map(|f| f.name.clone())
.collect()
})
.unwrap_or_default();
for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
if schema_link_keys.contains(&key) {
continue;
}
check_wiki_link(
store,
rel,
&link,
Some(link.line),
Some(&key),
basenames,
issues,
);
}
if let Some(t) = &type_ {
if store.config.ignored_types.iter().any(|it| it == t) {
push(
issues,
Severity::Info,
codes::POLICY_IGNORED_TYPE_PRESENT,
rel,
fm_key_line(fm_yaml, "type"),
Some("type".into()),
format!("file has ignored type `{t}` (per DB.md ## Policies)"),
None,
vec![PathBuf::from("DB.md")],
);
}
for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
if let Some(hit) =
derived_from_ignored_type(store, t, std::iter::once(link.target.as_str()))
{
push(
issues,
Severity::Warning,
codes::POLICY_IGNORED_TYPE_DERIVED,
rel,
Some(link.line),
Some("derived_from".into()),
format!(
"wiki-page derives from ignored-type record `{}` (type `{}`)",
hit.target, hit.target_type
),
None,
vec![
PathBuf::from(format!("{}.md", hit.target)),
PathBuf::from("DB.md"),
],
);
}
}
}
if let Some(t) = &type_ {
if let Some(schema) = effective_schema(store, t) {
check_schema(store, rel, fm, fm_yaml, &schema, issues);
}
}
}
fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
let line = fm_key_line(fm_yaml, "summary");
match fm.get("summary") {
None => push(
issues,
Severity::Error,
codes::SUMMARY_MISSING,
rel,
fm_key_line_or_top(fm_yaml, "summary"),
Some("summary".into()),
"content file has no `summary`".into(),
Some("run `dbmd fm init`".into()),
vec![],
),
Some(v) => {
let s = scalar_string(v).unwrap_or_default();
if s.trim().is_empty() {
push(
issues,
Severity::Error,
codes::SUMMARY_EMPTY,
rel,
line,
Some("summary".into()),
"`summary` is present but empty".into(),
Some("write a one-line summary, or run `dbmd fm init`".into()),
vec![],
);
} else if s.contains('\n') {
push(
issues,
Severity::Error,
codes::SUMMARY_MULTILINE,
rel,
line,
Some("summary".into()),
"`summary` must be one line (contains a newline)".into(),
Some("collapse the summary to a single line".into()),
vec![],
);
} else if s.chars().count() > MAX_SUMMARY_LEN {
push(
issues,
Severity::Warning,
codes::SUMMARY_TOO_LONG,
rel,
line,
Some("summary".into()),
format!(
"`summary` is {} chars (> {MAX_SUMMARY_LEN})",
s.chars().count()
),
Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
vec![],
);
}
}
}
}
fn check_body_wiki_links(
store: &Store,
rel: &Path,
body: &str,
fm_end_line: u32,
basenames: Option<&BasenameIndex>,
issues: &mut Vec<Issue>,
) {
for link in extract_wiki_links(body) {
let abs_line = fm_end_line + link.line;
check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
}
}
type BasenameIndex = HashMap<String, Vec<PathBuf>>;
fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
let mut idx: BasenameIndex = HashMap::new();
for rel in files {
if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
idx.entry(stem.to_string()).or_default().push(rel.clone());
}
}
idx
}
fn check_wiki_link(
store: &Store,
rel: &Path,
link: &Link,
line: Option<u32>,
key: Option<&str>,
basenames: Option<&BasenameIndex>,
issues: &mut Vec<Issue>,
) {
let bare = link.target.trim_end_matches(".md");
if !is_full_store_path(bare) {
if !bare.contains('/') {
if let Some(idx) = basenames {
if let Some(matches) = idx.get(bare) {
if matches.len() >= 2 {
let mut related = matches.clone();
related.sort();
push(
issues,
Severity::Error,
codes::WIKI_LINK_AMBIGUOUS,
rel,
line,
key.map(str::to_string),
format!(
"short-form wiki-link `[[{}]]` matches multiple files",
link.target
),
Some("use the full store-relative path to disambiguate".into()),
related,
);
return;
}
}
}
}
push(
issues,
Severity::Error,
codes::WIKI_LINK_SHORT_FORM,
rel,
line,
key.map(str::to_string),
format!(
"wiki-link `[[{}]]` is not a full store-relative path",
link.target
),
short_form_suggestion(bare),
vec![],
);
return;
}
if link.target.ends_with(".md") {
push(
issues,
Severity::Warning,
codes::WIKI_LINK_HAS_EXTENSION,
rel,
line,
key.map(str::to_string),
format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
Some(format!("drop the extension: [[{bare}]]")),
vec![],
);
}
let target_abs = store.root.join(format!("{bare}.md"));
if !target_abs.is_file() {
push(
issues,
Severity::Error,
codes::WIKI_LINK_BROKEN,
rel,
line,
key.map(str::to_string),
format!("wiki-link target `{bare}` doesn't exist"),
None,
vec![],
);
}
}
fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
if let Some(s) = store.config.schemas.get(type_) {
return Some(s.clone());
}
implicit_canonical_schema(type_)
}
fn schema_shaped_date_fields(store: &Store, type_: &str) -> BTreeSet<String> {
effective_schema(store, type_)
.map(|s| {
s.fields
.iter()
.filter(|f| matches!(f.shape, Some(Shape::Date)))
.map(|f| f.name.clone())
.collect()
})
.unwrap_or_default()
}
fn implicit_canonical_schema(type_: &str) -> Option<Schema> {
let link_field = |name: &str, prefix: &str| FieldSpec {
name: name.to_string(),
required: false,
shape: None,
link_prefix: Some(PathBuf::from(prefix)),
default: None,
enum_values: None,
unknown_modifiers: vec![],
};
let fields: Vec<FieldSpec> = match type_ {
"contact" => vec![link_field("company", "records/companies/")],
"expense" => vec![
link_field("vendor", "records/companies/"),
link_field("contact", "records/contacts/"),
],
"meeting" => vec![link_field("expense", "records/expenses/")],
"invoice" => vec![link_field("vendor", "records/companies/")],
_ => return None,
};
Some(Schema { fields })
}
fn check_schema(
store: &Store,
rel: &Path,
fm: &BTreeMap<String, Value>,
fm_yaml: &str,
schema: &Schema,
issues: &mut Vec<Issue>,
) {
for spec in &schema.fields {
let present = fm.get(&spec.name);
let line = fm_key_line(fm_yaml, &spec.name);
let is_empty = match present {
None => true,
Some(v) => scalar_string(v)
.map(|s| s.trim().is_empty())
.unwrap_or(false),
};
if spec.required && is_empty {
push(
issues,
Severity::Error,
codes::SCHEMA_MISSING_REQUIRED,
rel,
fm_key_line_or_top(fm_yaml, &spec.name),
Some(spec.name.clone()),
format!("required field `{}` is absent or empty", spec.name),
Some(format!("set `{}` to a non-empty value", spec.name)),
vec![],
);
continue;
}
let Some(value) = present else { continue };
let value_empty = value.is_null()
|| scalar_string(value)
.map(|s| s.trim().is_empty())
.unwrap_or(false);
if !spec.required && value_empty {
continue;
}
if let Some(prefix) = &spec.link_prefix {
check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
continue; }
if let Some(allowed) = &spec.enum_values {
if let Some(s) = scalar_string(value) {
if !allowed.iter().any(|a| a == &s) {
push(
issues,
Severity::Error,
codes::SCHEMA_ENUM_VIOLATION,
rel,
line,
Some(spec.name.clone()),
format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
Some(format!("use one of: {}", allowed.join(", "))),
vec![],
);
}
}
continue;
}
if let Some(shape) = spec.shape {
check_schema_shape(rel, &spec.name, value, shape, line, issues);
}
}
}
fn check_schema_link(
store: &Store,
rel: &Path,
field: &str,
fm_yaml: &str,
prefix: &Path,
line: Option<u32>,
issues: &mut Vec<Issue>,
) {
let prefix_str = prefix.to_string_lossy();
let prefix_str = prefix_str.trim_end_matches('/');
let suggestion = |target_leaf: &str| {
Some(format!(
"expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
))
};
let links = frontmatter_links_for_key(fm_yaml, field, 2);
if links.is_empty() {
let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
let leaf = slugish(raw);
push(
issues,
Severity::Error,
codes::SCHEMA_LINK_PREFIX_MISMATCH,
rel,
line,
Some(field.to_string()),
format!(
"`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
),
suggestion(&leaf),
vec![],
);
return;
}
for link in links {
let bare = link.target.trim_end_matches(".md");
if !path_under_prefix(bare, prefix_str) {
let leaf = bare.rsplit('/').next().unwrap_or(bare);
push(
issues,
Severity::Error,
codes::SCHEMA_LINK_PREFIX_MISMATCH,
rel,
line,
Some(field.to_string()),
format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
suggestion(leaf),
vec![],
);
} else {
let target_abs = store.root.join(format!("{bare}.md"));
if !target_abs.is_file() {
push(
issues,
Severity::Error,
codes::WIKI_LINK_BROKEN,
rel,
line,
Some(field.to_string()),
format!("wiki-link target `{bare}` doesn't exist"),
None,
vec![],
);
}
}
}
}
fn check_schema_shape(
rel: &Path,
field: &str,
value: &Value,
shape: Shape,
line: Option<u32>,
issues: &mut Vec<Issue>,
) {
let s = scalar_string(value).unwrap_or_default();
let ok = match shape {
Shape::String => true, Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
Shape::Date => is_iso8601_date_or_datetime(&s),
Shape::Email => is_email(&s),
Shape::Currency => is_currency(&s),
Shape::Url => is_url(&s),
};
if !ok {
push(
issues,
Severity::Error,
codes::SCHEMA_SHAPE_MISMATCH,
rel,
line,
Some(field.to_string()),
format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
Some(shape_suggestion(shape)),
vec![],
);
}
}
fn check_duplicates(parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
.iter()
.map(|(rel, p)| (rel, p.fm_yaml.as_str()))
.collect();
let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
for (rel, p) in parsed {
if let Some(map) = &p.fm {
if let Some(id) = map.get("id").and_then(scalar_string) {
if !id.trim().is_empty() {
by_id.entry(id).or_default().push(rel.clone());
}
}
}
}
for (id, files) in &by_id {
if files.len() > 1 {
let (reported, related) = canonical_and_related(files);
let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
push(
issues,
Severity::Error,
codes::DUP_ID,
&reported,
line,
Some("id".into()),
format!("id {id:?} is declared by more than one file"),
Some("give each file a unique `id` (or drop it to derive from the path)".into()),
related,
);
}
}
let field = |p: &Parsed, k: &str| -> Option<String> {
p.fm.as_ref()
.and_then(|m| m.get(k))
.and_then(scalar_string)
.map(|s| s.trim().to_lowercase())
};
let link_or_scalar = |p: &Parsed, k: &str| -> Option<String> {
if let Some(link) = frontmatter_links_for_key(&p.fm_yaml, k, 2)
.into_iter()
.next()
{
return Some(link.target.trim_end_matches(".md").to_lowercase());
}
field(p, k)
};
soft_dup(
parsed,
issues,
"contact",
codes::DUP_CONTACT_EMAIL,
Some("email"),
&fm_yaml_of,
|p| field(p, "email").map(|e| vec![e]),
);
soft_dup(
parsed,
issues,
"company",
codes::DUP_COMPANY_DOMAIN,
Some("domain"),
&fm_yaml_of,
|p| field(p, "domain").map(|d| vec![d]),
);
soft_dup(
parsed,
issues,
"expense",
codes::DUP_EXPENSE_TUPLE,
None,
&fm_yaml_of,
|p| {
Some(vec![
field(p, "date")?,
field(p, "amount")?,
link_or_scalar(p, "vendor")?,
])
},
);
soft_dup(
parsed,
issues,
"invoice",
codes::DUP_INVOICE_TUPLE,
None,
&fm_yaml_of,
|p| {
Some(vec![
link_or_scalar(p, "vendor")?,
field(p, "date")?,
field(p, "amount")?,
])
},
);
soft_dup(
parsed,
issues,
"email",
codes::DUP_EMAIL_REINGEST,
None,
&fm_yaml_of,
|p| {
Some(vec![
field(p, "from")?,
field(p, "subject")?,
field(p, "date")?,
])
},
);
soft_dup(
parsed,
issues,
"meeting",
codes::DUP_MEETING_TUPLE,
None,
&fm_yaml_of,
|p| {
let date = field(p, "date")?;
let attendees = meeting_attendees_key(p)?;
Some(vec![date, attendees])
},
);
}
#[allow(clippy::too_many_arguments)]
fn soft_dup(
parsed: &[(PathBuf, Parsed)],
issues: &mut Vec<Issue>,
type_: &str,
code: &'static str,
anchor_field: Option<&str>,
fm_yaml_of: &HashMap<&PathBuf, &str>,
key_of: impl Fn(&Parsed) -> Option<Vec<String>>,
) {
let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
for (rel, p) in parsed {
let is_type =
p.fm.as_ref()
.and_then(|m| m.get("type"))
.and_then(scalar_string)
.map(|t| t == type_)
.unwrap_or(false);
if !is_type {
continue;
}
if let Some(key) = key_of(p) {
groups.entry(key).or_default().push(rel.clone());
}
}
for files in groups.values() {
if files.len() > 1 {
let (reported, related) = canonical_and_related(files);
let (line, key) = match anchor_field {
Some(f) => (
fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, f)),
Some(f.to_string()),
),
None => (Some(1), None),
};
push(
issues,
Severity::Warning,
code,
&reported,
line,
key,
format!(
"{type_} record shares its dedup key with {} other record(s)",
related.len()
),
Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
related,
);
}
}
}
fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
let mut sorted = files.to_vec();
sorted.sort();
let reported = sorted[0].clone();
let related = sorted[1..].to_vec();
(reported, related)
}
fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
for rel in files {
if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
match layer {
"sources" => layers_present.insert("sources"),
"records" => layers_present.insert("records"),
"wiki" => layers_present.insert("wiki"),
_ => false,
};
}
if let Some(tf) = type_folder_of(rel) {
type_folders.entry(tf).or_default().push(rel.clone());
}
}
if !files.is_empty() {
let root_index = store.root.join("index.md");
if !root_index.is_file() {
push(
issues,
Severity::Error,
codes::INDEX_MISSING,
Path::new("index.md"),
None,
None,
"store has files but no root `index.md`".into(),
Some("run `dbmd index rebuild`".into()),
vec![],
);
} else {
check_index_scope(store, Path::new("index.md"), "root", None, issues);
}
}
for layer in &layers_present {
let layer_index_rel = PathBuf::from(layer).join("index.md");
let abs = store.root.join(&layer_index_rel);
if !abs.is_file() {
push(
issues,
Severity::Error,
codes::INDEX_MISSING,
&layer_index_rel,
None,
None,
format!("layer `{layer}/` has files but no `index.md`"),
Some("run `dbmd index rebuild`".into()),
vec![],
);
} else {
check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
}
}
for (tf, members) in &type_folders {
let index_md_rel = tf.join("index.md");
let index_md_abs = store.root.join(&index_md_rel);
let index_md_present = index_md_abs.is_file();
if !index_md_present {
push(
issues,
Severity::Error,
codes::INDEX_MISSING,
tf,
None,
None,
format!("non-empty folder `{}` has no index.md", tf.display()),
Some(format!(
"run `dbmd index rebuild --folder {}`",
tf.display()
)),
vec![],
);
continue;
}
check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
let jsonl_rel = tf.join("index.jsonl");
let jsonl_abs = store.root.join(&jsonl_rel);
if !jsonl_abs.is_file() {
push(
issues,
Severity::Error,
codes::INDEX_JSONL_MISSING,
&jsonl_rel,
None,
None,
format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
Some("run `dbmd index rebuild`".into()),
vec![],
);
} else {
check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
}
}
for rel in walk_index_files(&store.root) {
let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
let parent_str = parent.to_string_lossy().to_string();
let is_canonical = parent_str.is_empty() || matches!(parent_str.as_str(), "sources" | "records" | "wiki")
|| type_folders.contains_key(&parent);
if !is_canonical {
push(
issues,
Severity::Warning,
codes::INDEX_ORPHAN,
&rel,
None,
None,
format!(
"`{}` sits in an empty or non-canonical folder",
rel.display()
),
Some("remove it, or run `dbmd index rebuild`".into()),
vec![],
);
}
}
}
fn check_type_folder_index_md(
store: &Store,
tf: &Path,
index_rel: &Path,
members: &[PathBuf],
issues: &mut Vec<Issue>,
) {
let abs = store.root.join(index_rel);
let Ok(text) = std::fs::read_to_string(&abs) else {
return;
};
let entries = parse_index_entries(&text);
let listed: BTreeSet<PathBuf> = entries
.iter()
.map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
.collect();
for entry in &entries {
let bare = entry.target.trim_end_matches(".md");
let target_abs = store.root.join(format!("{bare}.md"));
if !target_abs.is_file() {
push(
issues,
Severity::Error,
codes::INDEX_STALE_ENTRY,
index_rel,
Some(entry.line),
None,
format!("index entry `[[{bare}]]` points at a missing file"),
Some("run `dbmd index rebuild`".into()),
vec![PathBuf::from(format!("{bare}.md"))],
);
continue;
}
if let Some(expected) = read_summary(&target_abs) {
if let Some(text_part) = &entry.summary_text {
if text_part.trim() != expected.trim() {
push(
issues,
Severity::Error,
codes::INDEX_SUMMARY_MISMATCH,
index_rel,
Some(entry.line),
None,
format!("index entry for `{bare}` text doesn't match the file's `summary`"),
Some("run `dbmd index rebuild`".into()),
vec![PathBuf::from(format!("{bare}.md"))],
);
}
}
}
}
let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
if content_members.len() <= 500 {
for m in content_members {
let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
if !listed.contains(&bare) {
push(
issues,
Severity::Error,
codes::INDEX_MISSING_ENTRY,
index_rel,
None,
None,
format!(
"file `{}` is not listed in its folder's `index.md`",
m.display()
),
Some("run `dbmd index rebuild`".into()),
vec![(*m).clone()],
);
}
}
}
let _ = tf;
}
fn check_type_folder_index_jsonl(
store: &Store,
tf: &Path,
jsonl_rel: &Path,
members: &[PathBuf],
issues: &mut Vec<Issue>,
) {
let abs = store.root.join(jsonl_rel);
let Ok(text) = std::fs::read_to_string(&abs) else {
return;
};
let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
for (i, line) in text.lines().enumerate() {
let line = line.trim();
if line.is_empty() {
continue;
}
let rec: serde_json::Value = match serde_json::from_str(line) {
Ok(v) => v,
Err(e) => {
push(
issues,
Severity::Error,
codes::INDEX_JSONL_DESYNC,
jsonl_rel,
Some((i + 1) as u32),
None,
format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
Some("run `dbmd index rebuild`".into()),
vec![],
);
continue;
}
};
if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
records.insert(PathBuf::from(path), rec);
}
}
let member_set: BTreeSet<PathBuf> = members
.iter()
.filter(|m| is_content_file(m))
.cloned()
.collect();
for path in records.keys() {
let target_abs = store.root.join(path);
if !target_abs.is_file() {
push(
issues,
Severity::Error,
codes::INDEX_JSONL_DESYNC,
jsonl_rel,
None,
None,
format!(
"`index.jsonl` record points at missing file `{}`",
path.display()
),
Some("run `dbmd index rebuild`".into()),
vec![],
);
}
}
for m in &member_set {
if !records.contains_key(m) {
push(
issues,
Severity::Error,
codes::INDEX_JSONL_DESYNC,
jsonl_rel,
None,
None,
format!(
"file `{}` is missing from the complete `index.jsonl`",
m.display()
),
Some("run `dbmd index rebuild`".into()),
vec![m.clone()],
);
}
}
for (path, rec) in &records {
let target_abs = store.root.join(path);
if !target_abs.is_file() {
continue;
}
let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
else {
continue; };
let Ok(expected_json) = serde_json::to_value(&expected) else {
continue;
};
let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
continue;
};
let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
for key in have.keys().chain(want.keys()) {
if key == "path" {
continue;
}
if have.get(key) != want.get(key) {
mismatched_keys.insert(key);
}
}
if !mismatched_keys.is_empty() {
let keys: Vec<&str> = mismatched_keys.into_iter().collect();
push(
issues,
Severity::Error,
codes::INDEX_JSONL_STALE,
jsonl_rel,
None,
Some(keys.join(",")),
format!(
"`index.jsonl` record for `{}` is stale ({})",
path.display(),
keys.join(", ")
),
Some("run `dbmd index rebuild`".into()),
vec![path.clone()],
);
}
}
let _ = tf;
}
fn check_index_scope(
store: &Store,
index_rel: &Path,
expected_scope: &str,
expected_folder: Option<&str>,
issues: &mut Vec<Issue>,
) {
let abs = store.root.join(index_rel);
let Ok(text) = std::fs::read_to_string(&abs) else {
return;
};
let Some((yaml, _, _)) = split_frontmatter(&text) else {
return;
};
let Ok(Value::Mapping(map)) = serde_yml::from_str::<Value>(&yaml) else {
return;
};
let fm = yaml_map_to_btree(&map);
if let Some(scope) = fm.get("scope").and_then(scalar_string) {
let scope_ok =
scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
if !scope_ok {
push(
issues,
Severity::Warning,
codes::INDEX_WRONG_SCOPE,
index_rel,
fm_key_line(&yaml, "scope"),
Some("scope".into()),
format!(
"index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
),
Some(format!("set `scope: {expected_scope}`")),
vec![],
);
}
}
if let Some(expected) = expected_folder {
if let Some(folder) = fm.get("folder").and_then(scalar_string) {
if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
push(
issues,
Severity::Warning,
codes::INDEX_WRONG_SCOPE,
index_rel,
fm_key_line(&yaml, "folder"),
Some("folder".into()),
format!("index `folder: {folder}` doesn't match location `{expected}`"),
Some(format!("set `folder: {expected}`")),
vec![],
);
}
}
}
}
fn check_log(store: &Store, issues: &mut Vec<Issue>) {
let log_rel = Path::new("log.md");
let abs = store.root.join(log_rel);
let Ok(text) = std::fs::read_to_string(&abs) else {
return;
};
let mut prev: Option<DateTime<FixedOffset>> = None;
for (i, line) in text.lines().enumerate() {
if !line.starts_with("## [") {
continue;
}
let line_no = (i + 1) as u32;
match parse_log_header(line) {
None => push(
issues,
Severity::Error,
codes::LOG_BAD_TIMESTAMP,
log_rel,
Some(line_no),
None,
format!("log entry header has an unparseable timestamp: {line:?}"),
Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
vec![],
),
Some((ts, kind, _object)) => {
if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
push(
issues,
Severity::Warning,
codes::LOG_UNKNOWN_KIND,
log_rel,
Some(line_no),
None,
format!("log entry kind `{kind}` is not recognized"),
None,
vec![],
);
}
if let Some(p) = prev {
if ts < p {
push(
issues,
Severity::Warning,
codes::LOG_OUT_OF_ORDER,
log_rel,
Some(line_no),
None,
"log entry is older than the entry above it (possible rewrite)".into(),
Some("append corrective entries; never reorder past ones".into()),
vec![],
);
}
}
prev = Some(ts);
}
}
}
}
struct Link {
target: String,
line: u32,
}
fn store_marker_present(store: &Store) -> bool {
let want = store.root.join("DB.md");
if !want.is_file() {
return false;
}
match std::fs::read_dir(&store.root) {
Ok(entries) => entries
.flatten()
.any(|e| e.file_name().to_str() == Some("DB.md")),
Err(_) => true, }
}
fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
let rel = Path::new("DB.md");
let abs = store.root.join("DB.md");
let Ok(text) = std::fs::read_to_string(&abs) else {
return; };
let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
push(
issues,
Severity::Error,
codes::DB_MD_BAD_TYPE,
rel,
Some(1),
Some("type".into()),
"DB.md has no frontmatter; it must declare `type: db-md`".into(),
Some("add a `---` frontmatter block with `type: db-md`".into()),
vec![],
);
for field in ["scope", "owner"] {
push(
issues,
Severity::Error,
codes::DB_MD_MISSING_FIELD,
rel,
Some(1),
Some(field.into()),
format!("DB.md frontmatter is missing required field `{field}`"),
Some(format!("add `{field}:` to the DB.md frontmatter")),
vec![],
);
}
return;
};
let fm: Option<BTreeMap<String, Value>> = match serde_yml::from_str::<Value>(&fm_yaml) {
Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
Ok(Value::Null) => Some(BTreeMap::new()),
_ => None,
};
match &fm {
Some(map) => {
let type_ = map.get("type").and_then(scalar_string);
if type_.as_deref() != Some("db-md") {
let (line, msg) = match &type_ {
Some(t) => (
fm_key_line(&fm_yaml, "type"),
format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
),
None => (
Some(1),
"DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
),
};
push(
issues,
Severity::Error,
codes::DB_MD_BAD_TYPE,
rel,
line,
Some("type".into()),
msg,
Some("set `type: db-md` in the DB.md frontmatter".into()),
vec![],
);
}
for field in ["scope", "owner"] {
let present = map
.get(field)
.and_then(scalar_string)
.map(|s| !s.trim().is_empty())
.unwrap_or(false);
if !present {
push(
issues,
Severity::Error,
codes::DB_MD_MISSING_FIELD,
rel,
fm_key_line_or_top(&fm_yaml, field),
Some(field.into()),
format!("DB.md frontmatter is missing required field `{field}`"),
Some(format!("add `{field}:` to the DB.md frontmatter")),
vec![],
);
}
}
}
None => {
push(
issues,
Severity::Error,
codes::DB_MD_BAD_TYPE,
rel,
Some(1),
Some("type".into()),
"DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
Some("fix the DB.md frontmatter and set `type: db-md`".into()),
vec![],
);
for field in ["scope", "owner"] {
push(
issues,
Severity::Error,
codes::DB_MD_MISSING_FIELD,
rel,
Some(1),
Some(field.into()),
format!("DB.md frontmatter is missing required field `{field}`"),
Some(format!("add `{field}:` to the DB.md frontmatter")),
vec![],
);
}
}
}
for section in crate::parser::extract_sections(&body) {
if section.level != 2 {
continue;
}
let name = section.heading.trim().to_ascii_lowercase();
if matches!(name.as_str(), "agent instructions" | "policies" | "schemas") {
continue;
}
let file_line = fm_end_line + section.line;
push(
issues,
Severity::Warning,
codes::DB_MD_UNKNOWN_SECTION,
rel,
Some(file_line),
None,
format!(
"DB.md has an unrecognized `## {}` section",
section.heading.trim()
),
Some(
"DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas` — \
remove or rename this heading"
.into(),
),
vec![],
);
}
}
fn not_a_store_issue(store: &Store) -> Issue {
Issue {
severity: Severity::Error,
code: codes::NOT_A_STORE,
file: store.root.clone(),
line: None,
key: None,
message: format!("{} has no DB.md; not a db.md store", store.root.display()),
suggestion: Some("create a `DB.md` at the store root".into()),
related: vec![],
}
}
fn canonical_layer_for_type(type_: &str) -> Option<&'static str> {
match type_ {
"email" | "transcript" | "pdf-source" => Some("sources"),
"contact" | "company" | "expense" | "meeting" | "decision" | "invoice" => Some("records"),
"wiki-page" => Some("wiki"),
_ => None,
}
}
fn layer_of(rel: &Path) -> Option<&'static str> {
match rel.iter().next().and_then(|s| s.to_str()) {
Some("sources") => Some("sources"),
Some("records") => Some("records"),
Some("wiki") => Some("wiki"),
_ => None,
}
}
fn is_content_file(rel: &Path) -> bool {
let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
return false;
};
if !matches!(first, "sources" | "records" | "wiki") {
return false;
}
let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
if matches!(name, "index.md" | "index.jsonl" | "log.md") {
return false;
}
name.ends_with(".md")
}
fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
let mut lines = text.lines();
let first = lines.next()?;
if first.trim_end() != "---" {
return None;
}
let mut yaml = String::new();
let mut close_line: Option<u32> = None;
let mut current = 1u32;
for line in lines {
current += 1;
if line.trim_end() == "---" {
close_line = Some(current);
break;
}
yaml.push_str(line);
yaml.push('\n');
}
let close_line = close_line?;
let body: String = text
.lines()
.skip(close_line as usize)
.collect::<Vec<_>>()
.join("\n");
Some((yaml, body, close_line))
}
fn read_summary(abs: &Path) -> Option<String> {
let text = std::fs::read_to_string(abs).ok()?;
let (yaml, _, _) = split_frontmatter(&text)?;
let value: Value = serde_yml::from_str(&yaml).ok()?;
if let Value::Mapping(m) = value {
m.get(Value::String("summary".into()))
.and_then(scalar_string)
} else {
None
}
}
fn yaml_map_to_btree(map: &serde_yml::Mapping) -> BTreeMap<String, Value> {
let mut out = BTreeMap::new();
for (k, v) in map {
if let Value::String(s) = k {
out.insert(s.clone(), v.clone());
}
}
out
}
fn scalar_string(v: &Value) -> Option<String> {
match v {
Value::String(s) => Some(s.clone()),
Value::Number(n) => Some(n.to_string()),
Value::Bool(b) => Some(b.to_string()),
_ => None,
}
}
fn is_flat_scalar_list(v: &Value) -> bool {
match v {
Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
_ => false,
}
}
fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
let mut out = Vec::new();
for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
for link in links {
out.push((key.clone(), link));
}
}
out
}
fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
if k == key {
return links;
}
}
Vec::new()
}
fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
if k == key {
return Some(value_text);
}
}
None
}
fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
let mut current: Option<(String, String, Vec<Link>)> = None;
for (idx, raw_line) in fm_yaml.lines().enumerate() {
let file_line = fm_start_line + idx as u32;
let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
let trimmed = raw_line.trim();
let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
top_level_key(raw_line)
} else {
None
};
if let Some((key, after)) = new_key {
if let Some(done) = current.take() {
blocks.push(done);
}
let mut links = Vec::new();
collect_line_links(after, file_line, &mut links);
current = Some((key, after.trim().to_string(), links));
} else if let Some((_k, value_text, links)) = current.as_mut() {
if !value_text.is_empty() {
value_text.push('\n');
}
value_text.push_str(trimmed);
collect_line_links(raw_line, file_line, links);
}
}
if let Some(done) = current.take() {
blocks.push(done);
}
blocks
}
fn top_level_key(line: &str) -> Option<(String, &str)> {
let (key, rest) = line.split_once(':')?;
let key = key.trim();
if key.is_empty()
|| !key
.chars()
.all(|c| c.is_alphanumeric() || c == '_' || c == '-')
{
return None;
}
Some((key.to_string(), rest))
}
fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
let bytes = s.as_bytes();
let mut i = 0;
while i + 1 < bytes.len() {
if bytes[i] == b'[' && bytes[i + 1] == b'[' {
if let Some(close) = s[i + 2..].find("]]") {
let inner = &s[i + 2..i + 2 + close];
let target = inner
.trim_start_matches('[')
.split('|')
.next()
.unwrap_or(inner)
.trim()
.to_string();
if !target.is_empty() {
links.push(Link {
target,
line: file_line,
});
}
i = i + 2 + close + 2;
continue;
}
}
i += 1;
}
}
fn extract_wiki_links(body: &str) -> Vec<Link> {
let mut out = Vec::new();
let mut in_fence = false;
for (idx, line) in body.lines().enumerate() {
let trimmed = line.trim_start();
if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
in_fence = !in_fence;
continue;
}
if in_fence {
continue;
}
let line_no = (idx + 1) as u32;
let bytes = line.as_bytes();
let mut i = 0;
while i + 1 < bytes.len() {
if bytes[i] == b'[' && bytes[i + 1] == b'[' {
if let Some(close) = line[i + 2..].find("]]") {
let inner = &line[i + 2..i + 2 + close];
let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
if !target.is_empty() && !target.starts_with('[') {
out.push(Link {
target,
line: line_no,
});
}
i = i + 2 + close + 2;
continue;
}
}
i += 1;
}
}
out
}
fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
let mut out = Vec::new();
for line in fm_yaml.lines() {
let Some((key, rest)) = line.split_once(':') else {
continue;
};
let key = key.trim();
if key.is_empty() || key.starts_with('#') || key.starts_with('-') {
continue;
}
let rest = rest.trim();
if rest.starts_with("[[[") {
out.push(key.to_string());
}
}
out
}
fn is_full_store_path(bare: &str) -> bool {
let mut parts = bare.splitn(2, '/');
let first = parts.next().unwrap_or("");
let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
matches!(first, "sources" | "records" | "wiki") && has_rest
}
fn path_under_prefix(bare: &str, prefix: &str) -> bool {
let prefix = prefix.trim_end_matches('/');
bare == prefix || bare.starts_with(&format!("{prefix}/"))
}
fn type_folder_of(rel: &Path) -> Option<PathBuf> {
let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
if comps.len() < 3 {
return None; }
if !matches!(comps[0], "sources" | "records" | "wiki") {
return None;
}
Some(PathBuf::from(comps[0]).join(comps[1]))
}
fn walk_content_files(root: &Path) -> Vec<PathBuf> {
let mut out = Vec::new();
for layer in ["sources", "records", "wiki"] {
let base = root.join(layer);
if !base.is_dir() {
continue;
}
for entry in walkdir::WalkDir::new(&base)
.into_iter()
.filter_entry(|e| {
let name = e.file_name().to_str().unwrap_or("");
!name.starts_with('.') && name != "log"
})
.flatten()
{
if !entry.file_type().is_file() {
continue;
}
let name = entry.file_name().to_str().unwrap_or("");
if name.ends_with(".md") && name != "index.md" {
if let Ok(rel) = entry.path().strip_prefix(root) {
out.push(rel.to_path_buf());
}
}
}
}
out.sort();
out
}
fn walk_index_files(root: &Path) -> Vec<PathBuf> {
let mut out = Vec::new();
if root.join("index.md").is_file() {
out.push(PathBuf::from("index.md"));
}
for layer in ["sources", "records", "wiki"] {
let base = root.join(layer);
if !base.is_dir() {
continue;
}
for entry in walkdir::WalkDir::new(&base)
.into_iter()
.filter_entry(|e| {
let name = e.file_name().to_str().unwrap_or("");
!name.starts_with('.') && name != "log"
})
.flatten()
{
if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
if let Ok(rel) = entry.path().strip_prefix(root) {
out.push(rel.to_path_buf());
}
}
}
}
out.sort();
out
}
struct IndexEntry {
target: String,
summary_text: Option<String>,
line: u32,
}
fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
let mut out = Vec::new();
let mut in_more = false;
for (idx, line) in text.lines().enumerate() {
let trimmed = line.trim_start();
if trimmed.starts_with("## More") {
in_more = true;
continue;
}
if in_more {
continue;
}
if !trimmed.starts_with("- ") {
continue;
}
let Some(open) = trimmed.find("[[") else {
continue;
};
let Some(close_rel) = trimmed[open + 2..].find("]]") else {
continue;
};
let inner = &trimmed[open + 2..open + 2 + close_rel];
let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
let after = &trimmed[open + 2 + close_rel + 2..];
let summary_text = extract_index_entry_summary(after);
out.push(IndexEntry {
target,
summary_text,
line: (idx + 1) as u32,
});
}
out
}
fn extract_index_entry_summary(after: &str) -> Option<String> {
let mut s = after.trim();
if s.starts_with('(') {
if let Some(close) = s.find(')') {
s = s[close + 1..].trim_start();
}
}
let s = if let Some(rest) = s.strip_prefix('—') {
rest.trim()
} else if let Some(rest) = s.strip_prefix('-') {
rest.trim()
} else {
return None;
};
if s.is_empty() {
return None;
}
let s = match s.split_once(" · ") {
Some((summary, _tags)) => summary.trim(),
None => s,
};
Some(s.to_string())
}
fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
let rest = line.strip_prefix("## [")?;
let close = rest.find(']')?;
let ts_str = &rest[..close];
let tail = rest[close + 1..].trim();
let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
let offset = FixedOffset::east_opt(0)?;
let ts = naive.and_local_timezone(offset).single()?;
let (kind, object) = match tail.split_once('|') {
Some((k, o)) => {
let o = o.trim();
(
k.trim().to_string(),
if o.is_empty() {
None
} else {
Some(o.to_string())
},
)
}
None => (tail.to_string(), None),
};
if kind.is_empty() {
return None;
}
Some((ts, kind, object))
}
fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
let text = std::fs::read_to_string(store.root.join("log.md")).ok()?;
let mut latest: Option<DateTime<FixedOffset>> = None;
for line in text.lines() {
if !line.starts_with("## [") {
continue;
}
if let Some((ts, kind, _)) = parse_log_header(line) {
if kind == "validate" {
latest = Some(match latest {
Some(p) if p >= ts => p,
_ => ts,
});
}
}
}
latest
}
fn changed_objects_since(
store: &Store,
cutoff: Option<DateTime<FixedOffset>>,
) -> BTreeSet<PathBuf> {
let mut out = BTreeSet::new();
let Ok(text) = std::fs::read_to_string(store.root.join("log.md")) else {
return out;
};
for line in text.lines() {
if !line.starts_with("## [") {
continue;
}
let Some((ts, kind, object)) = parse_log_header(line) else {
continue;
};
if let Some(c) = cutoff {
if ts < c {
continue;
}
}
if !matches!(
kind.as_str(),
"create" | "update" | "ingest" | "rename" | "delete" | "link"
) {
continue;
}
if let Some(obj) = object {
let bare = obj
.trim()
.trim_start_matches("[[")
.trim_end_matches("]]")
.split('|')
.next()
.unwrap_or("")
.trim()
.trim_end_matches(".md")
.to_string();
if bare.is_empty() {
continue;
}
out.insert(PathBuf::from(format!("{bare}.md")));
}
}
out
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DerivedFromIgnored {
pub target: String,
pub target_type: String,
}
pub fn derived_from_ignored_type<I, S>(
store: &Store,
type_: &str,
derived_from_targets: I,
) -> Option<DerivedFromIgnored>
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
if type_ != "wiki-page" || store.config.ignored_types.is_empty() {
return None;
}
for target in derived_from_targets {
let target = target.as_ref();
if let Some(target_type) = link_target_type(store, target) {
if store.config.ignored_types.contains(&target_type) {
return Some(DerivedFromIgnored {
target: target.to_string(),
target_type,
});
}
}
}
None
}
fn link_target_type(store: &Store, target: &str) -> Option<String> {
let bare = target.trim_end_matches(".md");
let abs = store.root.join(format!("{bare}.md"));
let text = std::fs::read_to_string(&abs).ok()?;
let (yaml, _, _) = split_frontmatter(&text)?;
let value: Value = serde_yml::from_str(&yaml).ok()?;
if let Value::Mapping(m) = value {
m.get(Value::String("type".into())).and_then(scalar_string)
} else {
None
}
}
fn canonical_date_fields(type_: &str) -> &'static [&'static str] {
match type_ {
"email" => &["date"],
"transcript" => &["recorded_at"],
"pdf-source" => &["received_at"],
"contact" => &["first_touch", "last_touch"],
"expense" => &["date"],
"meeting" => &["date"],
"invoice" => &["date", "paid_at"],
_ => &[],
}
}
fn meeting_attendees_key(p: &Parsed) -> Option<String> {
let mut set = BTreeSet::new();
for link in frontmatter_links_for_key(&p.fm_yaml, "attendees", 2) {
let norm = link.target.trim_end_matches(".md").to_lowercase();
if !norm.is_empty() {
set.insert(norm);
}
}
if set.is_empty() {
return None;
}
Some(set.into_iter().collect::<Vec<_>>().join(","))
}
fn is_iso8601(s: &str) -> bool {
DateTime::parse_from_rfc3339(s.trim()).is_ok()
}
fn is_iso8601_date_or_datetime(s: &str) -> bool {
let s = s.trim();
if DateTime::parse_from_rfc3339(s).is_ok() {
return true;
}
chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
}
fn is_email(s: &str) -> bool {
let s = s.trim();
let Some((local, domain)) = s.split_once('@') else {
return false;
};
!local.is_empty()
&& domain.contains('.')
&& !domain.starts_with('.')
&& !domain.ends_with('.')
&& !domain.contains(' ')
&& !local.contains(' ')
}
fn is_currency(s: &str) -> bool {
let mut t = s.trim();
for sym in ["$", "€", "£", "¥"] {
if let Some(rest) = t.strip_prefix(sym) {
t = rest.trim_start();
break;
}
}
if let Some((head, rest)) = t.split_once(char::is_whitespace) {
if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
t = rest.trim_start();
}
}
let cleaned: String = t.chars().filter(|c| *c != ',').collect();
is_plain_amount(cleaned.trim())
}
fn is_plain_amount(s: &str) -> bool {
let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
let (int_part, frac_part) = match digits.split_once('.') {
Some((i, f)) => (i, Some(f)),
None => (digits, None),
};
if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
return false;
}
match frac_part {
None => true,
Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
}
}
fn is_url(s: &str) -> bool {
let s = s.trim();
(s.starts_with("http://") || s.starts_with("https://")) && s.len() > "https://".len()
}
fn shape_suggestion(shape: Shape) -> String {
match shape {
Shape::String => "use a scalar string".into(),
Shape::Int => "use an integer".into(),
Shape::Bool => "use `true` or `false`".into(),
Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
Shape::Email => "use a `<local>@<domain>` address".into(),
Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
Shape::Url => "use an http(s) URL".into(),
}
}
fn short_form_suggestion(bare: &str) -> Option<String> {
Some(format!(
"use a full store-relative path, e.g. [[records/contacts/{}]]",
slugish(bare)
))
}
fn slugish(s: &str) -> String {
s.trim()
.to_lowercase()
.chars()
.map(|c| if c.is_whitespace() { '-' } else { c })
.filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
.collect()
}
#[allow(clippy::too_many_arguments)]
fn push(
issues: &mut Vec<Issue>,
severity: Severity,
code: &'static str,
file: &Path,
line: Option<u32>,
key: Option<String>,
message: String,
suggestion: Option<String>,
related: Vec<PathBuf>,
) {
issues.push(Issue {
severity,
code,
file: file.to_path_buf(),
line,
key,
message,
suggestion,
related,
});
}
fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
for (i, line) in fm_yaml.lines().enumerate() {
let trimmed = line.trim_start();
if let Some(rest) = trimmed.strip_prefix(key) {
if rest.starts_with(':') && line.starts_with(key) {
return Some((i as u32) + 2);
}
}
}
None
}
fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
fm_key_line(fm_yaml, key).or(Some(1))
}
fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
a.file
.cmp(&b.file)
.then(a.line.cmp(&b.line))
.then(a.code.cmp(b.code))
.then(a.key.cmp(&b.key))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::Config;
use std::fs;
use tempfile::TempDir;
struct Fixture {
dir: TempDir,
config: Config,
}
impl Fixture {
fn new() -> Self {
let dir = TempDir::new().unwrap();
fs::write(
dir.path().join("DB.md"),
"---\ntype: db-md\nscope: company\nowner: Test\n---\n",
)
.unwrap();
for layer in ["sources", "records", "wiki"] {
fs::create_dir_all(dir.path().join(layer)).unwrap();
}
Fixture {
dir,
config: Config::default(),
}
}
fn bare() -> Self {
let dir = TempDir::new().unwrap();
Fixture {
dir,
config: Config::default(),
}
}
fn write(&self, rel: &str, contents: &str) {
let abs = self.dir.path().join(rel);
fs::create_dir_all(abs.parent().unwrap()).unwrap();
fs::write(abs, contents).unwrap();
}
fn store(&self) -> Store {
Store {
root: self.dir.path().to_path_buf(),
config: self.config.clone(),
}
}
fn store_all(&self) -> Vec<Issue> {
validate_all(&self.store()).unwrap()
}
fn rebuild_indexes(&self) {
crate::index::Index::rebuild_all(&self.store()).unwrap();
}
}
fn has(issues: &[Issue], code: &str) -> bool {
issues.iter().any(|i| i.code == code)
}
fn count(issues: &[Issue], code: &str) -> usize {
issues.iter().filter(|i| i.code == code).count()
}
fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
issues
.iter()
.find(|i| i.code == code)
.unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
}
fn valid_contact(summary: &str) -> String {
format!(
"---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
)
}
#[test]
fn not_a_store_when_db_md_absent() {
let fx = Fixture::bare();
let issues = fx.store_all();
assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
assert_eq!(issues[0].code, codes::NOT_A_STORE);
assert!(issues[0].is_error());
}
#[test]
fn working_set_also_reports_not_a_store() {
let fx = Fixture::bare();
let issues = validate_working_set(&fx.store(), None).unwrap();
assert!(has(&issues, codes::NOT_A_STORE));
}
#[test]
fn clean_store_has_no_issues() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("A contact"));
fx.rebuild_indexes();
let issues = fx.store_all();
assert!(
issues.is_empty(),
"expected a clean store, got: {issues:#?}"
);
}
#[test]
fn valid_db_md_emits_no_structure_issue() {
let fx = Fixture::new();
let issues = fx.store_all();
assert!(
!has(&issues, codes::DB_MD_BAD_TYPE)
&& !has(&issues, codes::DB_MD_MISSING_FIELD)
&& !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
"a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
);
}
#[test]
fn db_md_wrong_type_is_error() {
let fx = Fixture::new();
fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
let issues = fx.store_all();
let i = find(&issues, codes::DB_MD_BAD_TYPE);
assert!(i.is_error());
assert_eq!(i.file, PathBuf::from("DB.md"));
assert_eq!(i.key.as_deref(), Some("type"));
assert_eq!(i.line, Some(2), "anchors to the `type:` line");
}
#[test]
fn db_md_missing_scope_and_owner_each_report() {
let fx = Fixture::new();
fx.write("DB.md", "---\ntype: db-md\n---\n");
let issues = fx.store_all();
assert_eq!(
count(&issues, codes::DB_MD_MISSING_FIELD),
2,
"both scope and owner absent → two issues: {issues:#?}"
);
let keys: BTreeSet<Option<String>> = issues
.iter()
.filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
.map(|i| i.key.clone())
.collect();
assert_eq!(
keys,
BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
"one issue keyed on each missing field"
);
for i in issues
.iter()
.filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
{
assert!(i.is_error());
assert_eq!(i.line, Some(1), "absent field anchors to the block top");
}
}
#[test]
fn db_md_blank_required_field_is_missing() {
let fx = Fixture::new();
fx.write(
"DB.md",
"---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
);
let issues = fx.store_all();
let i = find(&issues, codes::DB_MD_MISSING_FIELD);
assert_eq!(i.key.as_deref(), Some("owner"));
assert_eq!(
i.line,
Some(4),
"a present-but-empty field anchors to its line"
);
assert!(
count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
"scope is present and non-empty → only owner reported"
);
}
#[test]
fn db_md_unknown_section_is_warning() {
let fx = Fixture::new();
fx.write(
"DB.md",
"---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
);
let issues = fx.store_all();
let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
assert!(!i.is_error(), "unknown section is a warning, not an error");
assert_eq!(i.severity, Severity::Warning);
assert_eq!(
i.line,
Some(11),
"anchors to the `## Glossary` heading line"
);
assert!(
i.message.contains("Glossary"),
"the message names the offending section: {}",
i.message
);
assert_eq!(
count(&issues, codes::DB_MD_UNKNOWN_SECTION),
1,
"only the unrecognized section is flagged: {issues:#?}"
);
}
#[test]
fn db_md_no_frontmatter_reports_type_and_both_fields() {
let fx = Fixture::new();
fx.write("DB.md", "# just a heading, no frontmatter\n");
let issues = fx.store_all();
assert!(has(&issues, codes::DB_MD_BAD_TYPE));
assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
}
#[test]
fn contact_under_sources_is_layer_mismatch() {
let fx = Fixture::new();
fx.write(
"sources/misc/c.md",
&valid_contact("a contact in the wrong layer"),
);
let issues = fx.store_all();
let i = find(&issues, codes::LAYER_TYPE_MISMATCH);
assert!(!i.is_error(), "layer mismatch is a warning, not an error");
assert_eq!(i.severity, Severity::Warning);
assert_eq!(i.file, PathBuf::from("sources/misc/c.md"));
assert_eq!(i.key.as_deref(), Some("type"));
assert!(
i.message.contains("records") && i.message.contains("sources"),
"message names both the expected and actual layer: {}",
i.message
);
}
#[test]
fn email_under_wiki_is_layer_mismatch() {
let fx = Fixture::new();
fx.write(
"wiki/notes/e.md",
"---\ntype: email\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: misfiled email\n---\n\n# E\n",
);
let issues = fx.store_all();
let i = find(&issues, codes::LAYER_TYPE_MISMATCH);
assert_eq!(i.file, PathBuf::from("wiki/notes/e.md"));
}
#[test]
fn contact_under_records_is_not_flagged() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("correctly placed"));
let issues = fx.store_all();
assert!(
!has(&issues, codes::LAYER_TYPE_MISMATCH),
"a contact under records/ is correctly placed: {issues:#?}"
);
}
#[test]
fn custom_type_has_no_layer_expectation() {
let fx = Fixture::new();
fx.write(
"wiki/notes/p.md",
"---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a custom-typed note\n---\n\n# P\n",
);
let issues = fx.store_all();
assert!(
!has(&issues, codes::LAYER_TYPE_MISMATCH),
"a custom type is ambient context with no layer rule: {issues:#?}"
);
}
#[test]
fn wiki_page_layer_rule_both_directions() {
let fx = Fixture::new();
fx.write(
"wiki/topics/ok.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: properly placed synthesis\n---\n\n# OK\n",
);
fx.write(
"records/topics/bad.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: synthesis misfiled into records\n---\n\n# BAD\n",
);
let issues = fx.store_all();
let hits: Vec<&Issue> = issues
.iter()
.filter(|i| i.code == codes::LAYER_TYPE_MISMATCH)
.collect();
assert_eq!(hits.len(), 1, "only the misplaced one fires: {hits:#?}");
assert_eq!(hits[0].file, PathBuf::from("records/topics/bad.md"));
}
#[test]
fn layer_mismatch_fires_in_working_set_scope() {
let fx = Fixture::new();
fx.write(
"sources/misc/c.md",
&valid_contact("wrong layer, working set"),
);
fx.write(
"log.md",
"---\ntype: log\n---\n\n## [2026-05-22 10:00] create | sources/misc/c\nadded\n",
);
let issues = validate_working_set(&fx.store(), None).unwrap();
assert!(
has(&issues, codes::LAYER_TYPE_MISMATCH),
"the per-file layer check runs in the working-set scope too: {issues:#?}"
);
}
#[test]
fn missing_type_is_error() {
let fx = Fixture::new();
fx.write(
"records/contacts/a.md",
"---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
);
let issues = fx.store_all();
assert!(has(&issues, codes::FM_MISSING_TYPE));
assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
}
#[test]
fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
let fx = Fixture::new();
fx.write(
"wiki/people/a.md",
"# Just a heading\n\nNo frontmatter here.\n",
);
let issues = fx.store_all();
assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
}
#[test]
fn content_file_with_empty_frontmatter_reports_type_and_summary() {
let fx = Fixture::new();
fx.write("wiki/people/a.md", "---\n---\n\nbody\n");
let issues = fx.store_all();
assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
}
#[test]
fn malformed_yaml_is_error_and_suppresses_field_checks() {
let fx = Fixture::new();
fx.write(
"records/contacts/a.md",
"---\ntype: contact\n bad: : : :\n: : nope\n---\n\nbody\n",
);
let issues = fx.store_all();
assert!(has(&issues, codes::FM_MALFORMED_YAML));
assert!(
!has(&issues, codes::SUMMARY_MISSING),
"malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
);
}
#[test]
fn bad_created_timestamp_is_error() {
let fx = Fixture::new();
fx.write(
"records/contacts/a.md",
"---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
);
let issues = fx.store_all();
let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
assert_eq!(issue.key.as_deref(), Some("created"));
assert!(issue.is_error());
}
#[test]
fn date_only_created_is_rejected_but_type_date_field_accepted() {
let fx = Fixture::new();
fx.write(
"records/contacts/a.md",
"---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
);
let issues = fx.store_all();
let created_issues: Vec<_> = issues
.iter()
.filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
.collect();
assert_eq!(
created_issues.len(),
1,
"date-only `created` must fail: {issues:#?}"
);
assert!(
!issues.iter().any(
|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
),
"date-only `last_touch` is valid: {issues:#?}"
);
}
#[test]
fn summary_missing_empty_multiline_toolong() {
let fx = Fixture::new();
fx.write(
"wiki/people/missing.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
);
fx.write(
"wiki/people/empty.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \" \"\n---\n\nbody\n",
);
let long = "x".repeat(201);
fx.write(
"wiki/people/long.md",
&format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
);
let issues = fx.store_all();
assert!(has(&issues, codes::SUMMARY_MISSING));
assert_eq!(
find(&issues, codes::SUMMARY_MISSING).file,
PathBuf::from("wiki/people/missing.md")
);
assert!(has(&issues, codes::SUMMARY_EMPTY));
assert!(has(&issues, codes::SUMMARY_TOO_LONG));
assert_eq!(
find(&issues, codes::SUMMARY_TOO_LONG).severity,
Severity::Warning
);
}
#[test]
fn summary_multiline_via_yaml_block_scalar() {
let fx = Fixture::new();
fx.write(
"wiki/people/a.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n line one\n line two\n---\n\nbody\n",
);
let issues = fx.store_all();
assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
}
#[test]
fn summary_exactly_200_chars_is_ok() {
let fx = Fixture::new();
let s = "y".repeat(200);
fx.write(
"wiki/people/a.md",
&format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
);
let issues = fx.store_all();
assert!(
!has(&issues, codes::SUMMARY_TOO_LONG),
"200 is the bound, inclusive: {issues:#?}"
);
}
#[test]
fn meta_files_need_no_summary() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("A contact"));
fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
fx.write(
"records/index.md",
"---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
);
fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
fx.write(
"records/contacts/index.jsonl",
"{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
);
fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
let issues = fx.store_all();
assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
}
#[test]
fn nested_tags_warns_flat_tags_ok() {
let fx = Fixture::new();
fx.write(
"records/contacts/nested.md",
"---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n - good\n - [nested, list]\n---\n\n# A\n",
);
fx.write(
"records/contacts/flat.md",
"---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
);
let issues = fx.store_all();
let tag_issues: Vec<_> = issues
.iter()
.filter(|i| i.code == codes::TAGS_MALFORMED)
.collect();
assert_eq!(
tag_issues.len(),
1,
"only the nested-tags file should warn: {issues:#?}"
);
assert_eq!(
tag_issues[0].file,
PathBuf::from("records/contacts/nested.md")
);
assert_eq!(tag_issues[0].severity, Severity::Warning);
}
#[test]
fn short_form_wiki_link_is_error() {
let fx = Fixture::new();
let mut body = valid_contact("links to a short form");
body.push_str("\nSee [[sarah-chen]] for details.\n");
fx.write("wiki/people/a.md", &body);
let issues = fx.store_all();
let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
assert!(issue.is_error());
assert!(issue.message.contains("sarah-chen"));
assert!(
!issues
.iter()
.any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
"short-form should suppress broken: {issues:#?}"
);
}
#[test]
fn broken_full_path_wiki_link_is_error() {
let fx = Fixture::new();
let mut body = valid_contact("links to a missing file");
body.push_str("\nSee [[records/contacts/ghost]].\n");
fx.write("wiki/people/a.md", &body);
let issues = fx.store_all();
let issue = find(&issues, codes::WIKI_LINK_BROKEN);
assert!(issue.is_error());
assert!(issue.message.contains("records/contacts/ghost"));
}
#[test]
fn valid_full_path_wiki_link_passes() {
let fx = Fixture::new();
fx.write("records/contacts/target.md", &valid_contact("target"));
let mut body = valid_contact("links to target");
body.push_str("\nSee [[records/contacts/target]].\n");
fx.write("wiki/people/a.md", &body);
let issues = fx.store_all();
assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
}
#[test]
fn md_extension_wiki_link_warns_and_resolves() {
let fx = Fixture::new();
fx.write("records/contacts/target.md", &valid_contact("target"));
let mut body = valid_contact("links with extension");
body.push_str("\nSee [[records/contacts/target.md]].\n");
fx.write("wiki/people/a.md", &body);
let issues = fx.store_all();
let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
assert_eq!(issue.severity, Severity::Warning);
assert_eq!(
issue.suggestion.as_deref(),
Some("drop the extension: [[records/contacts/target]]")
);
assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
}
#[test]
fn wiki_links_in_code_fences_are_ignored() {
let fx = Fixture::new();
let mut body = valid_contact("has a fenced example");
body.push_str("\n```\n[[sarah-chen]]\n```\n");
fx.write("wiki/people/a.md", &body);
let issues = fx.store_all();
assert!(
!has(&issues, codes::WIKI_LINK_SHORT_FORM),
"fenced wiki-links must be ignored: {issues:#?}"
);
}
#[test]
fn flow_form_link_list_in_frontmatter_is_error() {
let fx = Fixture::new();
fx.write(
"records/meetings/m.md",
"---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
);
let issues = fx.store_all();
let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
assert!(issue.is_error());
assert_eq!(issue.key.as_deref(), Some("attendees"));
}
#[test]
fn block_form_link_list_in_frontmatter_is_not_flow_form() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("a"));
fx.write("records/contacts/b.md", &valid_contact("b"));
fx.write(
"records/meetings/m.md",
"---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n - [[records/contacts/a]]\n - [[records/contacts/b]]\n---\n\n# M\n",
);
let issues = fx.store_all();
assert!(
!has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
"{issues:#?}"
);
assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
}
#[test]
fn frontmatter_short_form_link_field_is_error() {
let fx = Fixture::new();
fx.write(
"wiki/people/a.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
);
let issues = fx.store_all();
let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
assert!(issue.is_error());
assert_eq!(issue.key.as_deref(), Some("related"));
}
#[test]
fn unquoted_frontmatter_link_is_recognized() {
let fx = Fixture::new();
fx.write(
"wiki/people/short.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
);
fx.write(
"wiki/people/broken.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
);
let issues = fx.store_all();
assert!(
issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
&& i.file == *"wiki/people/short.md"
&& i.key.as_deref() == Some("related")),
"unquoted short-form frontmatter link must be caught: {issues:#?}"
);
assert!(
issues
.iter()
.any(|i| i.code == codes::WIKI_LINK_BROKEN && i.file == *"wiki/people/broken.md"),
"unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
);
}
#[test]
fn short_form_canonical_link_field_is_prefix_mismatch() {
let fx = Fixture::new();
fx.write(
"records/contacts/a.md",
"---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
);
let issues = fx.store_all();
let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
assert_eq!(issue.key.as_deref(), Some("company"));
assert!(
!issues
.iter()
.any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
&& i.key.as_deref() == Some("company")),
"schema link fields are checked once, by the schema path: {issues:#?}"
);
}
#[test]
fn contact_company_plain_string_is_link_prefix_mismatch() {
let fx = Fixture::new();
fx.write(
"records/contacts/a.md",
"---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
);
let issues = fx.store_all();
let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
assert!(issue.is_error());
assert_eq!(issue.key.as_deref(), Some("company"));
let sugg = issue.suggestion.as_deref().unwrap();
assert!(
sugg.contains("records/companies/"),
"suggestion should name the prefix: {sugg}"
);
}
#[test]
fn contact_company_wrong_prefix_is_link_prefix_mismatch() {
let fx = Fixture::new();
fx.write(
"records/people/acme.md",
&valid_contact("acme as a person? wrong"),
);
fx.write(
"records/contacts/a.md",
"---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"[[records/people/acme]]\"\n---\n\n# Sarah\n",
);
let issues = fx.store_all();
let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
assert_eq!(issue.key.as_deref(), Some("company"));
}
#[test]
fn contact_company_correct_link_passes_schema() {
let fx = Fixture::new();
fx.write(
"records/companies/acme.md",
"---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a company\nname: Acme\n---\n\n# Acme\n",
);
fx.write(
"records/contacts/a.md",
"---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Sarah\n",
);
let issues = fx.store_all();
assert!(
!has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
"{issues:#?}"
);
}
#[test]
fn explicit_schema_required_shape_enum() {
let fx = {
let mut fx = Fixture::new();
let schema = Schema {
fields: vec![
FieldSpec {
name: "name".into(),
required: true,
..Default::default()
},
FieldSpec {
name: "email".into(),
required: true,
shape: Some(Shape::Email),
..Default::default()
},
FieldSpec {
name: "status".into(),
enum_values: Some(vec!["active".into(), "inactive".into()]),
..Default::default()
},
],
};
fx.config.schemas.insert("contact".into(), schema);
fx
};
fx.write(
"records/contacts/a.md",
"---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
);
let issues = fx.store_all();
assert!(
issues
.iter()
.any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
&& i.key.as_deref() == Some("name")),
"{issues:#?}"
);
assert!(
issues.iter().any(
|i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
),
"{issues:#?}"
);
assert!(
issues
.iter()
.any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
&& i.key.as_deref() == Some("status")),
"{issues:#?}"
);
}
#[test]
fn explicit_schema_overrides_implicit_canonical() {
let mut fx = Fixture::new();
fx.config.schemas.insert(
"contact".into(),
Schema {
fields: vec![FieldSpec {
name: "name".into(),
required: true,
..Default::default()
}],
},
);
fx.write(
"records/contacts/a.md",
"---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
);
let issues = fx.store_all();
assert!(
!has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
"explicit schema with no company link should override the implicit canonical one: {issues:#?}"
);
}
#[test]
fn schema_shape_int_and_url_and_currency() {
let mut fx = Fixture::new();
fx.config.schemas.insert(
"widget".into(),
Schema {
fields: vec![
FieldSpec {
name: "qty".into(),
shape: Some(Shape::Int),
..Default::default()
},
FieldSpec {
name: "site".into(),
shape: Some(Shape::Url),
..Default::default()
},
FieldSpec {
name: "price".into(),
shape: Some(Shape::Currency),
..Default::default()
},
],
},
);
fx.write(
"records/widgets/ok.md",
"---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
);
fx.write(
"records/widgets/bad.md",
"---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
);
let issues = fx.store_all();
let bad_shape: Vec<_> = issues
.iter()
.filter(|i| {
i.code == codes::SCHEMA_SHAPE_MISMATCH && i.file == *"records/widgets/bad.md"
})
.map(|i| i.key.clone().unwrap_or_default())
.collect();
assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
assert!(
bad_shape.contains(&"price".to_string()),
"inf must be rejected as currency: {issues:#?}"
);
assert!(
!issues
.iter()
.any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
&& i.file == *"records/widgets/ok.md"),
"valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
);
}
#[test]
fn is_currency_accepts_codes_and_rejects_non_numeric() {
for ok in [
"100",
"1234.56",
"$1,234.50",
"USD 100", "usd 100", "EUR 9.50",
"£12",
"¥1000",
"-5.00", "+5",
"1,000,000",
] {
assert!(is_currency(ok), "expected currency: {ok:?}");
}
for bad in [
"inf", "-inf", "infinity", "NaN", "nan", "12.999", "1.2345", "USD", "$", "free", "", " ", "1e3", "1.", ".5", "1 000", "USDD 100", ] {
assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
}
}
#[test]
fn ignored_type_present_is_info() {
let mut fx = Fixture::new();
fx.config.ignored_types.push("temp".into());
fx.write(
"records/temps/x.md",
"---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
);
let issues = fx.store_all();
let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
assert_eq!(issue.severity, Severity::Info);
assert!(!issue.is_error());
}
#[test]
fn wiki_page_derived_from_ignored_type_warns() {
let mut fx = Fixture::new();
fx.config.ignored_types.push("temp".into());
fx.write(
"records/temps/x.md",
"---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
);
fx.write(
"wiki/themes/t.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
);
let issues = fx.store_all();
let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
assert_eq!(issue.severity, Severity::Warning);
assert_eq!(issue.key.as_deref(), Some("derived_from"));
}
#[test]
fn derived_from_ignored_type_is_the_shared_policy_decision() {
let mut fx = Fixture::new();
fx.config.ignored_types.push("secret".into());
fx.write(
"records/secrets/s.md",
"---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
);
fx.write(
"records/contacts/c.md",
"---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
);
let store = fx.store();
let hit =
derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s"))
.expect("wiki-page → ignored-type record must match");
assert_eq!(hit.target, "records/secrets/s");
assert_eq!(hit.target_type, "secret");
assert_eq!(
derived_from_ignored_type(&store, "contact", std::iter::once("records/secrets/s")),
None,
"only wiki-page derivation is policed"
);
assert_eq!(
derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/contacts/c")),
None,
"deriving from a non-ignored type is allowed"
);
let hit = derived_from_ignored_type(
&store,
"wiki-page",
["records/contacts/c", "records/secrets/s"],
)
.expect("a later ignored-type target must still be found");
assert_eq!(hit.target, "records/secrets/s");
fx.config.ignored_types.clear();
let store = fx.store();
assert_eq!(
derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s")),
None,
"an empty ignored-types policy short-circuits"
);
}
#[test]
fn dup_id_is_hard_error_with_related() {
let fx = Fixture::new();
fx.write(
"records/contacts/a.md",
"---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
);
fx.write(
"records/contacts/b.md",
"---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
);
let issues = fx.store_all();
assert_eq!(
count(&issues, codes::DUP_ID),
1,
"one issue per group: {issues:#?}"
);
let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
assert!(a.is_error());
assert_eq!(a.key.as_deref(), Some("id"));
assert_eq!(
a.line,
Some(3),
"anchors to the `id` line on the reported file"
);
assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
}
#[test]
fn dup_id_not_fired_in_working_set() {
let fx = Fixture::new();
fx.write(
"records/contacts/a.md",
"---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
);
fx.write(
"records/contacts/b.md",
"---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
);
fx.write(
"log.md",
"---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
);
let issues = validate_working_set(&fx.store(), None).unwrap();
assert!(
!has(&issues, codes::DUP_ID),
"DUP_ID is --all only: {issues:#?}"
);
}
#[test]
fn dup_contact_email_is_warning() {
let fx = Fixture::new();
for (f, name) in [("a", "A"), ("b", "B")] {
fx.write(
&format!("records/contacts/{f}.md"),
&format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
);
}
let issues = fx.store_all();
assert_eq!(count(&issues, codes::DUP_CONTACT_EMAIL), 1);
let dup = find(&issues, codes::DUP_CONTACT_EMAIL);
assert_eq!(dup.severity, Severity::Warning);
assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
assert_eq!(dup.key.as_deref(), Some("email"));
assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
}
#[test]
fn dup_expense_tuple_and_clean_when_one_field_differs() {
let fx = Fixture::new();
fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
let exp = |f: &str, amount: &str| {
format!(
"---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
)
};
fx.write("records/expenses/e1.md", &exp("e1", "100"));
fx.write("records/expenses/e2.md", &exp("e2", "100"));
fx.write("records/expenses/e3.md", &exp("e3", "200")); let issues = fx.store_all();
assert_eq!(
count(&issues, codes::DUP_EXPENSE_TUPLE),
1,
"only e1+e2 collide, one issue: {issues:#?}"
);
let dup = find(&issues, codes::DUP_EXPENSE_TUPLE);
assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
assert_eq!(dup.line, Some(1), "tuple collision anchors to line 1");
assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
assert!(
!issues.iter().any(|i| i.code == codes::DUP_EXPENSE_TUPLE
&& i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
"e3 differs on amount and must not collide: {issues:#?}"
);
}
#[test]
fn dup_meeting_tuple_is_attendee_set_order_independent() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("a"));
fx.write("records/contacts/b.md", &valid_contact("b"));
let m = |f: &str, order: &str| {
let attendees = if order == "ab" {
" - [[records/contacts/a]]\n - [[records/contacts/b]]"
} else {
" - [[records/contacts/b]]\n - [[records/contacts/a]]"
};
format!(
"---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
)
};
fx.write("records/meetings/m1.md", &m("m1", "ab"));
fx.write("records/meetings/m2.md", &m("m2", "ba"));
let issues = fx.store_all();
assert_eq!(
count(&issues, codes::DUP_MEETING_TUPLE),
1,
"same date + same attendee set (any order) collide as one issue: {issues:#?}"
);
let dup = find(&issues, codes::DUP_MEETING_TUPLE);
assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
}
#[test]
fn missing_indexes_at_all_three_levels() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("a"));
let issues = fx.store_all();
let missing_files: BTreeSet<PathBuf> = issues
.iter()
.filter(|i| i.code == codes::INDEX_MISSING)
.map(|i| i.file.clone())
.collect();
assert!(
missing_files.contains(&PathBuf::from("index.md")),
"{issues:#?}"
);
assert!(
missing_files.contains(&PathBuf::from("records/index.md")),
"{issues:#?}"
);
assert!(
missing_files.contains(&PathBuf::from("records/contacts")),
"{issues:#?}"
);
assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
}
#[test]
fn index_stale_entry_and_missing_entry() {
let fx = Fixture::new();
fx.write(
"records/contacts/present.md",
&valid_contact("present contact"),
);
fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
fx.write(
"records/index.md",
"---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
);
fx.write(
"records/contacts/index.md",
"---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
);
fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
let issues = fx.store_all();
let stale = find(&issues, codes::INDEX_STALE_ENTRY);
assert!(stale.message.contains("ghost"));
assert!(stale.is_error());
let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
assert!(
missing.message.contains("present.md"),
"{}",
missing.message
);
}
#[test]
fn index_summary_mismatch() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("the real summary"));
fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
fx.write(
"records/index.md",
"---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
);
fx.write(
"records/contacts/index.md",
"---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
);
fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
let issues = fx.store_all();
let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
assert!(issue.is_error());
assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
}
#[test]
fn index_summary_match_passes() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("matching summary"));
fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
fx.write(
"records/index.md",
"---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
);
fx.write(
"records/contacts/index.md",
"---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
);
fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
let issues = fx.store_all();
assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
}
#[test]
fn index_entry_with_tag_suffix_matches_summary() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("clean summary"));
fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
fx.write(
"records/index.md",
"---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
);
fx.write(
"records/contacts/index.md",
"---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary · #customer\n",
);
fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
let issues = fx.store_all();
assert!(
!has(&issues, codes::INDEX_SUMMARY_MISMATCH),
"tag suffix should be stripped: {issues:#?}"
);
}
#[test]
fn index_jsonl_desync_missing_file_in_jsonl() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("a"));
fx.write("records/contacts/b.md", &valid_contact("b"));
fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
fx.write(
"records/index.md",
"---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
);
fx.write(
"records/contacts/index.md",
"---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
);
fx.write(
"records/contacts/index.jsonl",
"{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
);
let issues = fx.store_all();
let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
assert!(desync.message.contains("b.md"), "{}", desync.message);
}
#[test]
fn index_jsonl_desync_record_points_at_missing_file() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("a"));
fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
fx.write(
"records/index.md",
"---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
);
fx.write(
"records/contacts/index.md",
"---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
);
fx.write(
"records/contacts/index.jsonl",
"{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
);
let issues = fx.store_all();
assert!(
issues
.iter()
.any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
"{issues:#?}"
);
}
#[test]
fn index_jsonl_stale_summary() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("real summary"));
fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
fx.write(
"records/index.md",
"---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
);
fx.write(
"records/contacts/index.md",
"---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
);
fx.write(
"records/contacts/index.jsonl",
"{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
);
let issues = fx.store_all();
let stale = find(&issues, codes::INDEX_JSONL_STALE);
assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
assert!(stale.key.as_deref().unwrap().contains("summary"));
}
#[test]
fn index_jsonl_stale_queryable_field_email() {
let fx = Fixture::new();
let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
fx.write("records/contacts/a.md", contact);
fx.rebuild_indexes();
let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
let good = fs::read_to_string(&jsonl_path).unwrap();
assert!(
!has(&fx.store_all(), codes::INDEX_JSONL_STALE),
"freshly-rebuilt sidecar must not be stale"
);
assert!(
good.contains("real@correct.com"),
"sidecar projects email: {good}"
);
fx.write(
"records/contacts/index.jsonl",
&good.replace("real@correct.com", "STALE-WRONG@evil.com"),
);
let issues = fx.store_all();
let stale = find(&issues, codes::INDEX_JSONL_STALE);
assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
let key = stale.key.as_deref().unwrap();
assert!(
key.contains("email"),
"expected `email` in stale key, got {key:?}"
);
assert!(!key.contains("summary"), "summary still matches: {key:?}");
assert!(!key.contains("type"), "type still matches: {key:?}");
}
#[test]
fn index_jsonl_stale_typed_and_list_fields() {
let fx = Fixture::new();
let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
fx.write("records/expenses/e.md", expense);
fx.rebuild_indexes();
let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
let good = fs::read_to_string(&jsonl_path).unwrap();
assert!(
!has(&fx.store_all(), codes::INDEX_JSONL_STALE),
"freshly-rebuilt sidecar must not be stale"
);
let stale_line = good
.replace("\"q2\"", "\"WRONG-TAG\"")
.replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
.replace("1299", "9999");
fx.write("records/expenses/index.jsonl", &stale_line);
let issues = fx.store_all();
let stale = find(&issues, codes::INDEX_JSONL_STALE);
let key = stale.key.as_deref().unwrap();
for expected in ["amount", "tags", "updated"] {
assert!(
key.contains(expected),
"expected `{expected}` in stale key, got {key:?}"
);
}
}
#[test]
fn index_orphan_in_noncanonical_folder() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("a"));
fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
fx.write(
"records/index.md",
"---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
);
fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
fx.write(
"records/contacts/index.jsonl",
"{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
);
fx.write(
"records/contacts/subfolder/index.md",
"---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
);
let issues = fx.store_all();
let orphan = find(&issues, codes::INDEX_ORPHAN);
assert_eq!(orphan.severity, Severity::Warning);
assert_eq!(
orphan.file,
PathBuf::from("records/contacts/subfolder/index.md")
);
}
#[test]
fn index_wrong_scope() {
let fx = Fixture::new();
fx.write("records/contacts/a.md", &valid_contact("a"));
fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
fx.write(
"records/index.md",
"---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
);
fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
fx.write(
"records/contacts/index.jsonl",
"{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
);
let issues = fx.store_all();
let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
assert_eq!(issue.severity, Severity::Warning);
assert_eq!(issue.file, PathBuf::from("index.md"));
}
#[test]
fn capped_type_folder_index_does_not_flag_missing_entries() {
let fx = Fixture::new();
for i in 0..501 {
fx.write(
&format!("records/contacts/c{i:04}.md"),
&valid_contact(&format!("contact {i}")),
);
}
fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
fx.write(
"records/index.md",
"---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
);
fx.write(
"records/contacts/index.md",
"---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
);
let mut jsonl = String::new();
for i in 0..501 {
jsonl.push_str(&format!(
"{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
));
}
fx.write("records/contacts/index.jsonl", &jsonl);
let issues = fx.store_all();
assert!(
!has(&issues, codes::INDEX_MISSING_ENTRY),
"over the cap, missing browse entries are expected: {issues:#?}"
);
assert!(
!has(&issues, codes::INDEX_JSONL_DESYNC),
"{:#?}",
issues
.iter()
.filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
.collect::<Vec<_>>()
);
}
#[test]
fn log_bad_timestamp_unknown_kind_out_of_order() {
let fx = Fixture::new();
fx.write(
"log.md",
concat!(
"---\ntype: log\n---\n\n# Log\n\n",
"## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
"## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", "## [not-a-date] create | records/contacts/d\nx\n", ),
);
let issues = fx.store_all();
assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
assert_eq!(
find(&issues, codes::LOG_OUT_OF_ORDER).severity,
Severity::Warning
);
let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
assert_eq!(unknown.severity, Severity::Warning);
assert!(unknown.message.contains("frobnicate"));
let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
assert!(bad.is_error());
}
#[test]
fn log_validate_entry_without_object_is_well_formed() {
let fx = Fixture::new();
fx.write(
"log.md",
"---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
);
let issues = fx.store_all();
assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
}
#[test]
fn log_in_order_is_clean() {
let fx = Fixture::new();
fx.write(
"log.md",
concat!(
"---\ntype: log\n---\n\n",
"## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
"## [2026-05-27 10:05] update | records/contacts/a\nx\n",
),
);
let issues = fx.store_all();
assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
}
#[test]
fn log_not_checked_in_working_set() {
let fx = Fixture::new();
fx.write(
"log.md",
concat!(
"---\ntype: log\n---\n\n",
"## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
"## [2026-05-27 09:00] update | records/contacts/a\nx\n",
),
);
let issues = validate_working_set(&fx.store(), None).unwrap();
assert!(
!has(&issues, codes::LOG_OUT_OF_ORDER),
"log ordering is --all only: {issues:#?}"
);
}
#[test]
fn working_set_validates_only_changed_files() {
let fx = Fixture::new();
fx.write(
"records/contacts/dirty.md",
"---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
);
fx.write(
"records/contacts/unlogged.md",
"---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
);
fx.write(
"log.md",
"---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
);
let issues = validate_working_set(&fx.store(), None).unwrap();
assert!(
issues.iter().any(
|i| i.code == codes::FM_BAD_TIMESTAMP && i.file == *"records/contacts/dirty.md"
),
"{issues:#?}"
);
assert!(
!issues
.iter()
.any(|i| i.file == *"records/contacts/unlogged.md"),
"unlogged file must not be in the working set: {issues:#?}"
);
}
#[test]
fn working_set_includes_incoming_linkers_to_changed_path() {
let fx = Fixture::new();
fx.write(
"wiki/people/linker.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
);
fx.write(
"log.md",
"---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
);
let issues = validate_working_set(&fx.store(), None).unwrap();
assert!(
issues
.iter()
.any(|i| i.code == codes::WIKI_LINK_BROKEN && i.file == *"wiki/people/linker.md"),
"incoming linker to a removed path must be validated: {issues:#?}"
);
}
#[test]
fn working_set_respects_explicit_since_cutoff() {
let fx = Fixture::new();
fx.write(
"records/contacts/old.md",
"---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
);
fx.write(
"records/contacts/new.md",
"---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
);
fx.write(
"log.md",
concat!(
"---\ntype: log\n---\n\n",
"## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
"## [2026-05-25 10:00] update | records/contacts/new\nx\n",
),
);
let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
assert!(
issues.iter().any(|i| i.file == *"records/contacts/new.md"),
"{issues:#?}"
);
assert!(
!issues.iter().any(|i| i.file == *"records/contacts/old.md"),
"old change is before the cutoff: {issues:#?}"
);
}
#[test]
fn working_set_default_since_is_last_validate_entry() {
let fx = Fixture::new();
fx.write(
"records/contacts/before.md",
"---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
);
fx.write(
"records/contacts/after.md",
"---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
);
fx.write(
"log.md",
concat!(
"---\ntype: log\n---\n\n",
"## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
"## [2026-05-21 10:00] validate\nPASS\n\n",
"## [2026-05-22 10:00] update | records/contacts/after\nx\n",
),
);
let issues = validate_working_set(&fx.store(), None).unwrap();
assert!(
issues
.iter()
.any(|i| i.file == *"records/contacts/after.md"),
"{issues:#?}"
);
assert!(
!issues
.iter()
.any(|i| i.file == *"records/contacts/before.md"),
"change before the last validate entry is outside the default window: {issues:#?}"
);
}
#[test]
fn issues_are_sorted_by_file_then_line() {
let fx = Fixture::new();
fx.write("wiki/people/z.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
fx.write("wiki/people/a.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
let issues = fx.store_all();
let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
let mut sorted = files.clone();
sorted.sort();
assert_eq!(
files, sorted,
"issues must be emitted in a stable file order"
);
}
#[test]
fn frozen_page_is_not_a_validate_error() {
let mut fx = Fixture::new();
fx.config
.frozen_pages
.push(PathBuf::from("records/decisions/d.md"));
fx.write(
"records/decisions/d.md",
"---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
);
let issues = fx.store_all();
assert!(
!has(&issues, codes::POLICY_FROZEN_PAGE),
"frozen pages are enforced at write-time, not by validate: {issues:#?}"
);
}
#[test]
fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
let fx = Fixture::new();
fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
let mut body = valid_contact("links to sarah");
body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
fx.write("wiki/people/p.md", &body);
let issues = fx.store_all();
assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
}
#[test]
fn unknown_type_passes_through() {
let fx = Fixture::new();
fx.write(
"records/proposals/x.md",
"---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
);
let issues = fx.store_all();
assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
assert!(
!issues
.iter()
.any(|i| i.key.as_deref() == Some("custom_field")
|| i.key.as_deref() == Some("budget")),
"unknown fields are ambient context: {issues:#?}"
);
}
#[test]
fn expense_vendor_plain_string_is_link_prefix_mismatch() {
let fx = Fixture::new();
fx.write(
"records/expenses/e.md",
"---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: an expense\ndate: 2026-05-01\namount: 100\nvendor: \"Acme Co\"\n---\n\n# E\n",
);
let issues = fx.store_all();
let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
assert_eq!(issue.key.as_deref(), Some("vendor"));
assert!(issue
.suggestion
.as_deref()
.unwrap()
.contains("records/companies/"));
}
#[test]
fn invoice_vendor_correct_unquoted_link_passes() {
let fx = Fixture::new();
fx.write(
"records/companies/acme.md",
"---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a company\nname: Acme\n---\n\n# Acme\n",
);
fx.write(
"records/invoices/i.md",
"---\ntype: invoice\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: an invoice\ndate: 2026-05-01\namount: 100\nvendor: [[records/companies/acme]]\n---\n\n# I\n",
);
let issues = fx.store_all();
assert!(
!has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
"a correct unquoted vendor link must pass: {issues:#?}"
);
assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
}
#[test]
fn implicit_canonical_schema_matches_spec_link_set_exactly() {
let prefix_of = |type_: &str, field: &str| -> Option<String> {
implicit_canonical_schema(type_)?
.fields
.into_iter()
.find(|f| f.name == field)
.and_then(|f| f.link_prefix)
.map(|p| p.to_string_lossy().into_owned())
};
let expected: &[(&str, &str, &str)] = &[
("contact", "company", "records/companies/"),
("expense", "vendor", "records/companies/"),
("expense", "contact", "records/contacts/"),
("meeting", "expense", "records/expenses/"),
("invoice", "vendor", "records/companies/"),
];
for (type_, field, prefix) in expected {
assert_eq!(
prefix_of(type_, field).as_deref(),
Some(*prefix),
"{type_}.{field} must be an implicit link to {prefix}"
);
}
let total: usize = ["contact", "expense", "meeting", "invoice"]
.iter()
.filter_map(|t| implicit_canonical_schema(t))
.map(|s| s.fields.len())
.sum();
assert_eq!(total, expected.len(), "no unmarked field may be enforced");
assert!(
implicit_canonical_schema("wiki-page").is_none(),
"wiki-page.derived_from has no single canonical prefix; it must not be implicit-schema enforced"
);
assert!(implicit_canonical_schema("company").is_none());
assert!(implicit_canonical_schema("decision").is_none());
}
#[test]
fn wiki_page_derived_from_plain_string_is_not_prefix_mismatch() {
let fx = Fixture::new();
fx.write(
"wiki/themes/t.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a theme\ntopic: renewals\nderived_from: \"some notes\"\n---\n\n# T\n",
);
let issues = fx.store_all();
assert!(
!has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
"wiki-page.derived_from is not implicit-schema enforced: {issues:#?}"
);
}
#[test]
fn expense_contact_and_meeting_expense_enforce_their_prefixes() {
let fx = Fixture::new();
fx.write(
"records/expenses/e.md",
"---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: an expense\ndate: 2026-05-01\namount: 100\nvendor: [[records/companies/acme]]\ncontact: \"Jane Doe\"\n---\n\n# E\n",
);
fx.write(
"records/meetings/m.md",
"---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-01\nexpense: \"2026-05 lunch\"\n---\n\n# M\n",
);
let issues = fx.store_all();
let contact_issue = issues.iter().find(|i| {
i.code == codes::SCHEMA_LINK_PREFIX_MISMATCH
&& i.file == *"records/expenses/e.md"
&& i.key.as_deref() == Some("contact")
});
let contact_issue = contact_issue.unwrap_or_else(|| {
panic!("expense.contact plain string must be a prefix mismatch: {issues:#?}")
});
assert!(contact_issue
.suggestion
.as_deref()
.unwrap()
.contains("records/contacts/"));
let expense_issue = issues.iter().find(|i| {
i.code == codes::SCHEMA_LINK_PREFIX_MISMATCH
&& i.file == *"records/meetings/m.md"
&& i.key.as_deref() == Some("expense")
});
let expense_issue = expense_issue.unwrap_or_else(|| {
panic!("meeting.expense plain string must be a prefix mismatch: {issues:#?}")
});
assert!(expense_issue
.suggestion
.as_deref()
.unwrap()
.contains("records/expenses/"));
}
#[test]
fn incoming_linker_scan_does_not_prefix_match() {
let fx = Fixture::new();
fx.write(
"wiki/people/only-sarah-chen.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
);
fx.write(
"log.md",
"---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
);
let issues = validate_working_set(&fx.store(), None).unwrap();
assert!(
!issues
.iter()
.any(|i| i.file == *"wiki/people/only-sarah-chen.md"),
"a prefix-sharing link must not pull a file into the working set: {issues:#?}"
);
}
#[test]
fn incoming_linker_scan_pulls_in_catalog_index_md() {
let fx = Fixture::new();
fx.write(
"records/contacts/index.md",
"---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
);
fx.write(
"log.md",
"---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
);
let issues = validate_working_set(&fx.store(), None).unwrap();
assert!(
issues.iter().any(
|i| i.file == *"records/contacts/index.md" && i.code == codes::WIKI_LINK_BROKEN
),
"the catalog `index.md` linking to the deleted target must be pulled \
into the working set and flagged WIKI_LINK_BROKEN (proves the scan \
uses embedded-ripgrep `Store::find_links_to`, not the index-skipping \
walk-and-read): {issues:#?}"
);
}
#[test]
fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
let fx = Fixture::new();
fx.write(
"wiki/people/refers-sarah.md",
"---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
);
fx.write(
"records/meetings/2026/05/kickoff.md",
"---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
);
fx.write(
"log.md",
"---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
);
let issues = validate_working_set(&fx.store(), None).unwrap();
assert!(
issues
.iter()
.any(|i| i.file == *"wiki/people/refers-sarah.md"
&& i.code == codes::WIKI_LINK_BROKEN),
"linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
);
assert!(
issues
.iter()
.any(|i| i.file == *"records/meetings/2026/05/kickoff.md"
&& i.code == codes::WIKI_LINK_BROKEN),
"linker to the SECOND deleted target (typed-field edge) must also be \
pulled in and flagged — proves the scan covers the whole changed set, \
not just one object: {issues:#?}"
);
}
#[test]
fn frontmatter_block_sequence_links_each_get_their_own_line() {
let fx = Fixture::new();
fx.write(
"records/meetings/m.md",
"---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n - [[records/contacts/ghost1]]\n - [[records/contacts/ghost2]]\n---\n\n# M\n",
);
let issues = fx.store_all();
let broken_lines: BTreeSet<Option<u32>> = issues
.iter()
.filter(|i| i.code == codes::WIKI_LINK_BROKEN)
.map(|i| i.line)
.collect();
assert_eq!(
broken_lines.len(),
2,
"two distinct broken-link lines: {issues:#?}"
);
}
#[test]
fn every_code_constant_is_documented_in_spec() {
let this_src = include_str!("validate.rs");
let mut codes_in_module: Vec<String> = Vec::new();
let mut in_codes_mod = false;
for line in this_src.lines() {
let t = line.trim();
if t.starts_with("pub mod codes") {
in_codes_mod = true;
continue;
}
if in_codes_mod && line == "}" {
break;
}
if in_codes_mod {
if let Some(rest) = t.strip_prefix("pub const ") {
let value = rest
.split_once('=')
.map(|(_, v)| v.trim())
.and_then(|v| v.strip_prefix('"'))
.and_then(|v| v.strip_suffix("\";"))
.unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
codes_in_module.push(value.to_string());
}
}
}
assert!(
codes_in_module.len() >= 36,
"parsed only {} code constants from `mod codes`; the parser likely \
broke against a source-format change",
codes_in_module.len()
);
let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
let spec = fs::read_to_string(&spec_path)
.unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
let missing: Vec<&String> = codes_in_module
.iter()
.filter(|code| !spec.contains(&format!("| `{code}` |")))
.collect();
assert!(
missing.is_empty(),
"validation codes emitted by the engine but absent from SPEC.md \
§ Validation (the declared complete vocabulary): {missing:?}"
);
}
}