Skip to main content

dbmd_core/
stats.rs

1//! `stats` — store overview, **computed on demand** (a SWEEP, like `du` —
2//! never a maintained or precomputed cache).
3//!
4//! Serves both the human (how big is my brain, what's the shape) and the agent
5//! (orientation). Deliberately excludes graph density / degree / top-linked
6//! analytics — low agent value, and a human who wants graph metrics opens the
7//! store in Obsidian, so we never build the full graph just for stats.
8
9use std::collections::{BTreeMap, BTreeSet, HashSet};
10use std::path::{Path, PathBuf};
11
12use regex::Regex;
13
14use crate::store::{Layer, Store};
15
16/// A point-in-time overview of a store. Pure data; the CLI formats it to text
17/// or JSON.
18#[derive(Debug, Clone, Default, PartialEq)]
19pub struct Stats {
20    /// Total content-file count across all layers.
21    pub total_files: usize,
22    /// File count per layer.
23    pub files_per_layer: BTreeMap<Layer, usize>,
24    /// Total size on disk, in bytes.
25    pub total_size_bytes: u64,
26    /// Count per `type:` value (the type distribution).
27    pub type_distribution: BTreeMap<String, usize>,
28    /// Number of orphan files (no incoming and no outgoing wiki-links).
29    pub orphan_count: usize,
30    /// Number of broken wiki-links (target file doesn't exist).
31    pub broken_link_count: usize,
32    /// Top types by count, descending (ties broken by type name ascending).
33    pub top_types: Vec<(String, usize)>,
34    /// Recognized canonical types that appear in the store.
35    pub recognized_types_present: Vec<String>,
36    /// Custom (non-canonical) types that appear in the store.
37    pub custom_types_present: Vec<String>,
38}
39
40/// The canonical **content** type vocabulary from SPEC.md § Recognized types.
41///
42/// Excludes the three meta types (`db-md`, `index`, `log`) — those mark config
43/// and catalog files, not content. A `type:` value not in this set is "custom"
44/// for schema-coverage purposes (still valid; the agent treats it as ambient
45/// context). Kept here, derived from the SPEC table, because no shared
46/// canonical-type constant exists elsewhere in the crate yet.
47const RECOGNIZED_CONTENT_TYPES: &[&str] = &[
48    "email",
49    "transcript",
50    "pdf-source",
51    "contact",
52    "company",
53    "expense",
54    "meeting",
55    "decision",
56    "invoice",
57    "wiki-page",
58];
59
60/// How many entries [`Stats::top_types`] holds.
61const TOP_TYPES_LIMIT: usize = 10;
62
63/// One content file discovered by the SWEEP, with everything `stats` needs:
64/// where it lives, how big it is, its declared `type`, and the wiki-link
65/// targets it emits (store-relative, `.md` stripped, short-form excluded).
66struct FileFacts {
67    /// Store-relative path *without* the `.md` extension — the node id used to
68    /// resolve wiki-links and detect orphans.
69    node_id: PathBuf,
70    /// The layer this file lives under.
71    layer: Layer,
72    /// File size on disk, in bytes.
73    size_bytes: u64,
74    /// The declared `type:`, if the frontmatter has one.
75    type_: Option<String>,
76    /// Every wiki-link target this file emits, store-relative with any trailing
77    /// `.md` stripped, in source order (not deduped, short-form included).
78    /// Resolved against the complete node set in a second pass.
79    raw_targets: Vec<PathBuf>,
80}
81
82impl FileFacts {
83    /// The subset of [`raw_targets`](FileFacts::raw_targets) that could resolve
84    /// to a store node: full store-relative paths. Short-form targets (no `/`)
85    /// are dropped — they're a `WIKI_LINK_SHORT_FORM` validation error, not a
86    /// graph edge, so stats neither counts them as broken nor lets them wire a
87    /// file out of orphan status.
88    fn resolvable_targets(&self) -> impl Iterator<Item = &PathBuf> {
89        self.raw_targets.iter().filter(|t| is_full_path(t))
90    }
91}
92
93/// **SWEEP.** Walk the store once and compute its [`Stats`]. Run occasionally
94/// (overview / orientation), never on the interactive loop.
95pub fn compute(store: &Store) -> crate::Result<Stats> {
96    let link_re = wiki_link_regex();
97
98    // First pass: walk every layer once, recording per-file facts and the set
99    // of node ids that exist on disk. Link resolution waits for the second
100    // pass, once every node's existence is known.
101    let mut existing_nodes: HashSet<PathBuf> = HashSet::new();
102    let mut facts: Vec<FileFacts> = Vec::new();
103
104    for layer in Layer::all() {
105        let layer_root = store.root.join(layer_dir_name(layer));
106        for abs in walk_layer_content_files(&layer_root)? {
107            let rel = abs.strip_prefix(&store.root).unwrap_or(&abs).to_path_buf();
108            let node_id = strip_md(&rel);
109            existing_nodes.insert(node_id.clone());
110
111            let size_bytes = std::fs::metadata(&abs).map(|m| m.len()).unwrap_or(0);
112            let text = std::fs::read_to_string(&abs).unwrap_or_default();
113            let type_ = parse_type(&text);
114            let raw_targets = extract_link_targets(&text, &link_re);
115
116            facts.push(FileFacts {
117                node_id,
118                layer,
119                size_bytes,
120                type_,
121                raw_targets,
122            });
123        }
124    }
125
126    // Second pass: classify every file's links against the complete node set,
127    // counting broken links (full-path targets with no file on disk) and
128    // recording which nodes receive an incoming edge. Short-form targets are a
129    // validation error elsewhere, not a stats edge, so they're skipped here:
130    // they neither wire a file in nor count as broken.
131    let mut stats = Stats::default();
132    let mut linked_to: HashSet<PathBuf> = HashSet::new();
133    for file in &facts {
134        for target in file.resolvable_targets() {
135            if existing_nodes.contains(target) {
136                linked_to.insert(target.clone());
137            } else {
138                // Broken links count occurrences, not distinct targets.
139                stats.broken_link_count += 1;
140            }
141        }
142    }
143
144    // Third pass: roll the per-file facts up into the aggregate Stats. A file is
145    // an orphan iff it has neither a resolvable outgoing edge nor an incoming one.
146    for file in &facts {
147        stats.total_files += 1;
148        *stats.files_per_layer.entry(file.layer).or_insert(0) += 1;
149        stats.total_size_bytes += file.size_bytes;
150
151        if let Some(t) = &file.type_ {
152            *stats.type_distribution.entry(t.clone()).or_insert(0) += 1;
153        }
154
155        let has_outgoing = file
156            .resolvable_targets()
157            .any(|t| existing_nodes.contains(t));
158        let has_incoming = linked_to.contains(&file.node_id);
159        if !has_outgoing && !has_incoming {
160            stats.orphan_count += 1;
161        }
162    }
163
164    stats.top_types = top_types(&stats.type_distribution, TOP_TYPES_LIMIT);
165    let (recognized, custom) = split_schema_coverage(&stats.type_distribution);
166    stats.recognized_types_present = recognized;
167    stats.custom_types_present = custom;
168
169    Ok(stats)
170}
171
172/// On-disk folder name for a layer. Local copy so `stats` doesn't couple to
173/// [`Layer::dir_name`].
174fn layer_dir_name(layer: Layer) -> &'static str {
175    match layer {
176        Layer::Sources => "sources",
177        Layer::Records => "records",
178        Layer::Wiki => "wiki",
179    }
180}
181
182/// Recursively collect the `.md` **content** files under one layer root,
183/// skipping hidden entries (`.git`, dotfiles), the `log/` archive tree, and the
184/// `index.md` catalog meta files. Returns absolute paths. A missing layer root
185/// yields an empty list (a store need not have all three layers).
186fn walk_layer_content_files(layer_root: &Path) -> crate::Result<Vec<PathBuf>> {
187    let mut out = Vec::new();
188    if !layer_root.is_dir() {
189        return Ok(out);
190    }
191    let walker = walkdir::WalkDir::new(layer_root)
192        .into_iter()
193        .filter_entry(|e| {
194            // Skip hidden dirs/files and any `log` directory wholesale.
195            let name = e.file_name().to_string_lossy();
196            if name.starts_with('.') {
197                return false;
198            }
199            if e.file_type().is_dir() && name == "log" {
200                return false;
201            }
202            true
203        });
204    for entry in walker {
205        let entry = entry.map_err(|e| {
206            crate::Error::Io(
207                e.into_io_error()
208                    .unwrap_or_else(|| std::io::Error::other("walk error")),
209            )
210        })?;
211        if !entry.file_type().is_file() {
212            continue;
213        }
214        let path = entry.path();
215        let name = entry.file_name().to_string_lossy();
216        // Content files are `.md`; `index.md` is a meta catalog file, not
217        // content, and `index.jsonl` / other sidecars aren't `.md` at all.
218        if !name.ends_with(".md") || name == "index.md" {
219            continue;
220        }
221        out.push(path.to_path_buf());
222    }
223    out.sort();
224    Ok(out)
225}
226
227/// The wiki-link matcher: `[[target]]` or `[[target|display]]`. Captures the
228/// target (group 1), excluding `]` and `|`. Anchored on the literal brackets so
229/// it ignores `[markdown](links)`.
230fn wiki_link_regex() -> Regex {
231    // `[^\[\]|]+` keeps the target free of brackets and the display pipe.
232    Regex::new(r"\[\[([^\[\]|]+)(?:\|[^\]]*)?\]\]").expect("static wiki-link regex is valid")
233}
234
235/// Every wiki-link target in a file's full text (frontmatter + body), trimmed,
236/// with any trailing `.md` removed. Order-preserving; not deduped.
237fn extract_link_targets(text: &str, re: &Regex) -> Vec<PathBuf> {
238    re.captures_iter(text)
239        .filter_map(|c| c.get(1))
240        .map(|m| {
241            let raw = m.as_str().trim();
242            strip_md(Path::new(raw))
243        })
244        .collect()
245}
246
247/// Drop a trailing `.md` from a path, leaving everything else intact.
248fn strip_md(path: &Path) -> PathBuf {
249    let s = path.to_string_lossy();
250    match s.strip_suffix(".md") {
251        Some(stem) => PathBuf::from(stem),
252        None => path.to_path_buf(),
253    }
254}
255
256/// True if a wiki-link target is a full store-relative path (contains a path
257/// separator). Short-form targets like `sarah-chen` are false. Doctrine: only
258/// full paths resolve to a node.
259fn is_full_path(target: &Path) -> bool {
260    target.components().count() > 1
261}
262
263/// Read the `type:` value from a file's leading YAML frontmatter block, if the
264/// file has one. Returns `None` when there's no frontmatter or no `type` key.
265/// Self-contained (does not route through the crate's parser): split on the
266/// `---` fences, parse the block as a YAML mapping, read `type` as a string.
267fn parse_type(text: &str) -> Option<String> {
268    let yaml = frontmatter_block(text)?;
269    let value: serde_yml::Value = serde_yml::from_str(&yaml).ok()?;
270    let mapping = value.as_mapping()?;
271    let type_val = mapping.get(serde_yml::Value::String("type".to_string()))?;
272    let s = type_val.as_str()?.trim();
273    if s.is_empty() {
274        None
275    } else {
276        Some(s.to_string())
277    }
278}
279
280/// Extract the raw YAML between a leading `---` fence and its closing `---`.
281/// The opening fence must be the very first line of the file (the universal
282/// frontmatter contract: frontmatter is the first thing in the file).
283fn frontmatter_block(text: &str) -> Option<String> {
284    // Normalize away a leading BOM, but require `---` as the first line.
285    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
286    let mut lines = text.lines();
287    let first = lines.next()?;
288    if first.trim_end() != "---" {
289        return None;
290    }
291    let mut body = String::new();
292    for line in lines {
293        if line.trim_end() == "---" {
294            return Some(body);
295        }
296        body.push_str(line);
297        body.push('\n');
298    }
299    // No closing fence: not a valid frontmatter block.
300    None
301}
302
303/// Sort a type distribution into the top `limit` types by count descending,
304/// ties broken by type name ascending.
305fn top_types(dist: &BTreeMap<String, usize>, limit: usize) -> Vec<(String, usize)> {
306    let mut pairs: Vec<(String, usize)> = dist.iter().map(|(k, v)| (k.clone(), *v)).collect();
307    // BTreeMap iteration is already name-ascending; a stable sort by count
308    // descending therefore yields (count desc, name asc).
309    pairs.sort_by_key(|p| std::cmp::Reverse(p.1));
310    pairs.truncate(limit);
311    pairs
312}
313
314/// Partition the present types into (recognized canonical content types,
315/// custom types). Both lists are sorted ascending and deduped.
316fn split_schema_coverage(dist: &BTreeMap<String, usize>) -> (Vec<String>, Vec<String>) {
317    let canonical: BTreeSet<&str> = RECOGNIZED_CONTENT_TYPES.iter().copied().collect();
318    let mut recognized = Vec::new();
319    let mut custom = Vec::new();
320    // BTreeMap keys are already sorted ascending.
321    for type_ in dist.keys() {
322        if canonical.contains(type_.as_str()) {
323            recognized.push(type_.clone());
324        } else {
325            custom.push(type_.clone());
326        }
327    }
328    (recognized, custom)
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334    use crate::parser::Config;
335    use std::fs;
336    use tempfile::TempDir;
337
338    /// Build a `Store` rooted at a fresh tempdir with an empty `DB.md` marker.
339    /// Bypasses `Store::open` by constructing the struct directly —
340    /// `stats::compute` only reads `store.root`.
341    fn temp_store() -> (TempDir, Store) {
342        let dir = TempDir::new().expect("tempdir");
343        fs::write(dir.path().join("DB.md"), "---\ntype: db-md\n---\n").expect("write DB.md");
344        let store = Store {
345            root: dir.path().to_path_buf(),
346            config: Config::default(),
347        };
348        (dir, store)
349    }
350
351    /// Write a content file at a store-relative path, creating parent dirs.
352    fn write_rel(store: &Store, rel: &str, contents: &str) {
353        let abs = store.root.join(rel);
354        if let Some(parent) = abs.parent() {
355            fs::create_dir_all(parent).expect("mkdir parents");
356        }
357        fs::write(abs, contents).expect("write content file");
358    }
359
360    /// A minimal content file body: frontmatter with the given type, no links.
361    fn doc(type_: &str, summary: &str) -> String {
362        format!("---\ntype: {type_}\nsummary: \"{summary}\"\n---\n\nbody\n")
363    }
364
365    #[test]
366    fn empty_store_is_all_zeros() {
367        let (_d, store) = temp_store();
368        let s = compute(&store).expect("compute");
369        assert_eq!(s.total_files, 0);
370        assert_eq!(s.total_size_bytes, 0);
371        assert!(s.files_per_layer.is_empty());
372        assert!(s.type_distribution.is_empty());
373        assert_eq!(s.orphan_count, 0);
374        assert_eq!(s.broken_link_count, 0);
375        assert!(s.top_types.is_empty());
376        assert!(s.recognized_types_present.is_empty());
377        assert!(s.custom_types_present.is_empty());
378    }
379
380    #[test]
381    fn counts_files_per_layer_and_total() {
382        let (_d, store) = temp_store();
383        write_rel(&store, "sources/emails/a.md", &doc("email", "a"));
384        write_rel(&store, "sources/emails/b.md", &doc("email", "b"));
385        write_rel(&store, "records/contacts/c.md", &doc("contact", "c"));
386        write_rel(&store, "wiki/people/p.md", &doc("wiki-page", "p"));
387
388        let s = compute(&store).expect("compute");
389        assert_eq!(s.total_files, 4);
390        assert_eq!(s.files_per_layer.get(&Layer::Sources), Some(&2));
391        assert_eq!(s.files_per_layer.get(&Layer::Records), Some(&1));
392        assert_eq!(s.files_per_layer.get(&Layer::Wiki), Some(&1));
393    }
394
395    #[test]
396    fn ignores_meta_files_and_non_md_and_dotdirs_and_log() {
397        let (_d, store) = temp_store();
398        // Real content.
399        write_rel(&store, "records/contacts/real.md", &doc("contact", "real"));
400        // Meta + non-content that must NOT be counted.
401        write_rel(
402            &store,
403            "records/contacts/index.md",
404            "---\ntype: index\nscope: type-folder\n---\n",
405        );
406        write_rel(&store, "records/contacts/index.jsonl", "{}\n");
407        write_rel(&store, "records/notes.txt", "not markdown\n");
408        // `log/` archive tree under a layer is skipped wholesale.
409        write_rel(&store, "sources/log/2026-04.md", &doc("email", "archived"));
410        // Hidden dir contents are skipped.
411        write_rel(
412            &store,
413            "wiki/.obsidian/cache.md",
414            &doc("wiki-page", "hidden"),
415        );
416
417        let s = compute(&store).expect("compute");
418        assert_eq!(s.total_files, 1, "only the one real content file counts");
419        assert_eq!(s.files_per_layer.get(&Layer::Records), Some(&1));
420        assert_eq!(s.files_per_layer.get(&Layer::Sources), None);
421        assert_eq!(s.files_per_layer.get(&Layer::Wiki), None);
422    }
423
424    #[test]
425    fn total_size_is_sum_of_content_file_bytes() {
426        let (_d, store) = temp_store();
427        let a = doc("email", "a");
428        let b = "---\ntype: contact\nsummary: x\n---\n\nlonger body text here\n".to_string();
429        write_rel(&store, "sources/emails/a.md", &a);
430        write_rel(&store, "records/contacts/b.md", &b);
431        // A skipped file's bytes must not be included.
432        write_rel(
433            &store,
434            "records/contacts/index.md",
435            "---\ntype: index\n---\nbig meta file padding padding\n",
436        );
437
438        let s = compute(&store).expect("compute");
439        let expected = a.len() as u64 + b.len() as u64;
440        assert_eq!(s.total_size_bytes, expected);
441    }
442
443    #[test]
444    fn type_distribution_counts_each_type_value() {
445        let (_d, store) = temp_store();
446        write_rel(&store, "sources/emails/a.md", &doc("email", "a"));
447        write_rel(&store, "sources/emails/b.md", &doc("email", "b"));
448        write_rel(&store, "sources/emails/c.md", &doc("email", "c"));
449        write_rel(&store, "records/contacts/d.md", &doc("contact", "d"));
450        write_rel(&store, "records/proposals/e.md", &doc("proposal", "e"));
451
452        let s = compute(&store).expect("compute");
453        assert_eq!(s.type_distribution.get("email"), Some(&3));
454        assert_eq!(s.type_distribution.get("contact"), Some(&1));
455        assert_eq!(s.type_distribution.get("proposal"), Some(&1));
456        assert_eq!(s.type_distribution.len(), 3);
457    }
458
459    #[test]
460    fn file_without_type_is_counted_in_totals_but_not_distribution() {
461        let (_d, store) = temp_store();
462        // A content file with frontmatter but no `type:` key.
463        write_rel(
464            &store,
465            "wiki/themes/x.md",
466            "---\nsummary: no type here\n---\n\nbody\n",
467        );
468        // A content file with no frontmatter at all.
469        write_rel(&store, "wiki/themes/y.md", "just a body, no frontmatter\n");
470
471        let s = compute(&store).expect("compute");
472        assert_eq!(s.total_files, 2, "untyped files still count toward totals");
473        assert_eq!(s.files_per_layer.get(&Layer::Wiki), Some(&2));
474        assert!(
475            s.type_distribution.is_empty(),
476            "no type key => no distribution entry, not an empty-string bucket"
477        );
478    }
479
480    #[test]
481    fn top_types_orders_by_count_desc_then_name_asc() {
482        let (_d, store) = temp_store();
483        // contact x3, email x3 (tie), decision x1.
484        write_rel(&store, "records/contacts/c1.md", &doc("contact", "1"));
485        write_rel(&store, "records/contacts/c2.md", &doc("contact", "2"));
486        write_rel(&store, "records/contacts/c3.md", &doc("contact", "3"));
487        write_rel(&store, "sources/emails/e1.md", &doc("email", "1"));
488        write_rel(&store, "sources/emails/e2.md", &doc("email", "2"));
489        write_rel(&store, "sources/emails/e3.md", &doc("email", "3"));
490        write_rel(&store, "records/decisions/d1.md", &doc("decision", "1"));
491
492        let s = compute(&store).expect("compute");
493        assert_eq!(
494            s.top_types,
495            vec![
496                ("contact".to_string(), 3),
497                ("email".to_string(), 3),
498                ("decision".to_string(), 1),
499            ],
500            "ties (contact, email both 3) break by name ascending; decision trails"
501        );
502    }
503
504    #[test]
505    fn top_types_is_capped_at_ten() {
506        let (_d, store) = temp_store();
507        // 12 distinct custom types, each one file.
508        for i in 0..12 {
509            let t = format!("type{i:02}");
510            write_rel(&store, &format!("records/{t}/f.md"), &doc(&t, "x"));
511        }
512        let s = compute(&store).expect("compute");
513        assert_eq!(s.top_types.len(), 10, "top_types caps at 10");
514        assert_eq!(
515            s.type_distribution.len(),
516            12,
517            "distribution keeps all types"
518        );
519    }
520
521    #[test]
522    fn schema_coverage_splits_recognized_from_custom() {
523        let (_d, store) = temp_store();
524        write_rel(&store, "records/contacts/c.md", &doc("contact", "c")); // recognized
525        write_rel(&store, "sources/emails/e.md", &doc("email", "e")); // recognized
526        write_rel(&store, "wiki/people/p.md", &doc("wiki-page", "p")); // recognized
527        write_rel(&store, "records/proposals/x.md", &doc("proposal", "x")); // custom
528        write_rel(&store, "records/widgets/w.md", &doc("widget", "w")); // custom
529
530        let s = compute(&store).expect("compute");
531        assert_eq!(
532            s.recognized_types_present,
533            vec![
534                "contact".to_string(),
535                "email".to_string(),
536                "wiki-page".to_string()
537            ],
538            "recognized canonical content types, sorted ascending"
539        );
540        assert_eq!(
541            s.custom_types_present,
542            vec!["proposal".to_string(), "widget".to_string()],
543            "non-canonical types land in custom, sorted ascending"
544        );
545    }
546
547    #[test]
548    fn meta_types_are_not_recognized_content_types() {
549        // A stray file declaring a meta type (`index`/`log`/`db-md`) as its
550        // type must be treated as custom for schema coverage — those are meta,
551        // not content, types. (Such a file is unusual but stats must not lie.)
552        let (_d, store) = temp_store();
553        write_rel(&store, "wiki/synthesis/weird.md", &doc("log", "weird"));
554        let s = compute(&store).expect("compute");
555        assert!(
556            s.recognized_types_present.is_empty(),
557            "`log` is a meta type, not a recognized content type"
558        );
559        assert_eq!(s.custom_types_present, vec!["log".to_string()]);
560    }
561
562    #[test]
563    fn orphans_are_files_with_no_incoming_and_no_outgoing_links() {
564        let (_d, store) = temp_store();
565        // a -> b (a has outgoing, b has incoming). c is isolated => orphan.
566        write_rel(
567            &store,
568            "records/contacts/a.md",
569            "---\ntype: contact\nsummary: a\n---\n\nSee [[records/contacts/b]].\n",
570        );
571        write_rel(&store, "records/contacts/b.md", &doc("contact", "b"));
572        write_rel(&store, "records/contacts/c.md", &doc("contact", "c"));
573
574        let s = compute(&store).expect("compute");
575        assert_eq!(s.orphan_count, 1, "only c is an orphan");
576    }
577
578    #[test]
579    fn a_file_with_only_an_incoming_link_is_not_an_orphan() {
580        let (_d, store) = temp_store();
581        // b has no outgoing links, but a links to it => b is NOT an orphan.
582        // a itself has an outgoing link => also not an orphan. Zero orphans.
583        write_rel(
584            &store,
585            "wiki/people/a.md",
586            "---\ntype: wiki-page\nsummary: a\n---\n\n[[wiki/people/b]]\n",
587        );
588        write_rel(&store, "wiki/people/b.md", &doc("wiki-page", "b"));
589
590        let s = compute(&store).expect("compute");
591        assert_eq!(s.orphan_count, 0);
592    }
593
594    #[test]
595    fn frontmatter_wiki_links_count_as_edges_for_orphans() {
596        let (_d, store) = temp_store();
597        // The link lives in a frontmatter field, not the body. It must still
598        // wire `contact` -> `company`, so neither is an orphan.
599        write_rel(
600            &store,
601            "records/contacts/sarah.md",
602            "---\ntype: contact\nsummary: s\ncompany: [[records/companies/acme]]\n---\n\nbody\n",
603        );
604        write_rel(&store, "records/companies/acme.md", &doc("company", "acme"));
605
606        let s = compute(&store).expect("compute");
607        assert_eq!(
608            s.orphan_count, 0,
609            "a frontmatter wiki-link is a real edge; neither endpoint is orphaned"
610        );
611    }
612
613    #[test]
614    fn broken_links_count_targets_that_do_not_exist() {
615        let (_d, store) = temp_store();
616        // Two links: one to an existing file, one to a missing file.
617        write_rel(
618            &store,
619            "wiki/people/a.md",
620            "---\ntype: wiki-page\nsummary: a\n---\n\n[[wiki/people/b]] and [[records/contacts/ghost]]\n",
621        );
622        write_rel(&store, "wiki/people/b.md", &doc("wiki-page", "b"));
623
624        let s = compute(&store).expect("compute");
625        assert_eq!(s.broken_link_count, 1, "only the ghost target is broken");
626    }
627
628    #[test]
629    fn broken_link_resolves_with_md_extension_stripped() {
630        let (_d, store) = temp_store();
631        // Link written WITH a `.md` extension still resolves to the real file
632        // (the parser accepts `.md`; validate only warns). Not broken.
633        write_rel(
634            &store,
635            "wiki/people/a.md",
636            "---\ntype: wiki-page\nsummary: a\n---\n\n[[wiki/people/b.md]]\n",
637        );
638        write_rel(&store, "wiki/people/b.md", &doc("wiki-page", "b"));
639
640        let s = compute(&store).expect("compute");
641        assert_eq!(
642            s.broken_link_count, 0,
643            "a `.md`-suffixed target resolves to the same node and is not broken"
644        );
645    }
646
647    #[test]
648    fn short_form_links_are_not_broken_and_do_not_wire_the_graph() {
649        let (_d, store) = temp_store();
650        // `[[b]]` is a short-form (no `/`): a validation error elsewhere, but
651        // for stats it neither counts as broken (it doesn't resolve to a node)
652        // nor wires `a` into the graph. So `a` (no other links) is an orphan.
653        write_rel(
654            &store,
655            "records/contacts/a.md",
656            "---\ntype: contact\nsummary: a\n---\n\n[[b]]\n",
657        );
658        write_rel(&store, "records/contacts/b.md", &doc("contact", "b"));
659
660        let s = compute(&store).expect("compute");
661        assert_eq!(
662            s.broken_link_count, 0,
663            "short-form links are not counted as broken by stats"
664        );
665        // a has only a short-form link (not an edge) => orphan. b has no links
666        // and no real incoming edge => orphan. Both orphaned.
667        assert_eq!(s.orphan_count, 2);
668    }
669
670    #[test]
671    fn display_alias_links_resolve_to_the_target_not_the_alias() {
672        let (_d, store) = temp_store();
673        // `[[wiki/people/b|Bob]]` targets b, displays "Bob". The alias must be
674        // stripped: the edge goes to b (exists), so it's not broken and b is
675        // not an orphan.
676        write_rel(
677            &store,
678            "wiki/people/a.md",
679            "---\ntype: wiki-page\nsummary: a\n---\n\nmet [[wiki/people/b|Bob]] today\n",
680        );
681        write_rel(&store, "wiki/people/b.md", &doc("wiki-page", "b"));
682
683        let s = compute(&store).expect("compute");
684        assert_eq!(s.broken_link_count, 0, "alias target resolves and exists");
685        assert_eq!(s.orphan_count, 0, "a links out, b is linked to");
686    }
687
688    #[test]
689    fn duplicate_links_in_one_file_count_broken_per_occurrence() {
690        let (_d, store) = temp_store();
691        // The same missing target twice => two broken-link occurrences.
692        write_rel(
693            &store,
694            "wiki/people/a.md",
695            "---\ntype: wiki-page\nsummary: a\n---\n\n[[records/contacts/ghost]] [[records/contacts/ghost]]\n",
696        );
697        let s = compute(&store).expect("compute");
698        assert_eq!(
699            s.broken_link_count, 2,
700            "broken links count occurrences, not distinct targets"
701        );
702    }
703
704    #[test]
705    fn markdown_links_are_not_treated_as_wiki_links() {
706        let (_d, store) = temp_store();
707        // A standard markdown link to an external URL must not register as a
708        // wiki edge (so this file stays an orphan) nor as a broken link.
709        write_rel(
710            &store,
711            "wiki/people/a.md",
712            "---\ntype: wiki-page\nsummary: a\n---\n\nSee [Acme](https://acme.io/path).\n",
713        );
714        let s = compute(&store).expect("compute");
715        assert_eq!(s.broken_link_count, 0, "markdown links aren't graph edges");
716        assert_eq!(s.orphan_count, 1, "the file has no wiki-links => orphan");
717    }
718
719    #[test]
720    fn a_link_to_an_existing_file_in_another_layer_resolves() {
721        let (_d, store) = temp_store();
722        // wiki page links to a source file in a different layer; cross-layer
723        // full-path links resolve like any other.
724        write_rel(
725            &store,
726            "wiki/people/a.md",
727            "---\ntype: wiki-page\nsummary: a\n---\n\nfrom [[sources/emails/2026/05/m]]\n",
728        );
729        write_rel(&store, "sources/emails/2026/05/m.md", &doc("email", "m"));
730
731        let s = compute(&store).expect("compute");
732        assert_eq!(s.broken_link_count, 0);
733        assert_eq!(s.orphan_count, 0, "both endpoints are wired");
734    }
735}