Skip to main content

dbmd_core/
stats.rs

1//! `stats` — store overview, **computed on demand** (a SWEEP, like `du` —
2//! never a maintained or precomputed cache).
3//!
4//! Serves both the human (how big is my brain, what's the shape) and the agent
5//! (orientation). Deliberately excludes graph density / degree / top-linked
6//! analytics — low agent value, and a human who wants graph metrics opens the
7//! store in Obsidian, so we never build the full graph just for stats.
8
9use std::collections::{BTreeMap, HashSet};
10use std::path::{Path, PathBuf};
11
12use regex::Regex;
13
14use crate::store::{Layer, Store};
15
16/// A point-in-time overview of a store. Pure data; the CLI formats it to text
17/// or JSON.
18#[derive(Debug, Clone, Default, PartialEq)]
19pub struct Stats {
20    /// Total content-file count across all layers.
21    pub total_files: usize,
22    /// File count per layer.
23    pub files_per_layer: BTreeMap<Layer, usize>,
24    /// Total size on disk, in bytes.
25    pub total_size_bytes: u64,
26    /// Count per `type:` value (the type distribution).
27    pub type_distribution: BTreeMap<String, usize>,
28    /// Number of orphan files (no incoming and no outgoing wiki-links).
29    pub orphan_count: usize,
30    /// Number of broken wiki-links (target file doesn't exist).
31    pub broken_link_count: usize,
32    /// Top types by count, descending (ties broken by type name ascending).
33    pub top_types: Vec<(String, usize)>,
34}
35
36/// How many entries [`Stats::top_types`] holds.
37const TOP_TYPES_LIMIT: usize = 10;
38
39/// One content file discovered by the SWEEP, with everything `stats` needs:
40/// where it lives, how big it is, its declared `type`, and the wiki-link
41/// targets it emits (store-relative, `.md` stripped, short-form excluded).
42struct FileFacts {
43    /// Store-relative path *without* the `.md` extension — the node id used to
44    /// resolve wiki-links and detect orphans.
45    node_id: PathBuf,
46    /// The layer this file lives under.
47    layer: Layer,
48    /// File size on disk, in bytes.
49    size_bytes: u64,
50    /// The declared `type:`, if the frontmatter has one.
51    type_: Option<String>,
52    /// Every wiki-link target this file emits, store-relative with any trailing
53    /// `.md` stripped, in source order (not deduped, short-form included).
54    /// Resolved against the complete node set in a second pass.
55    raw_targets: Vec<PathBuf>,
56}
57
58impl FileFacts {
59    /// The subset of [`raw_targets`](FileFacts::raw_targets) that could resolve
60    /// to a store node: full store-relative paths. Short-form targets (no `/`)
61    /// are dropped — they're a `WIKI_LINK_SHORT_FORM` validation error, not a
62    /// graph edge, so stats neither counts them as broken nor lets them wire a
63    /// file out of orphan status.
64    fn resolvable_targets(&self) -> impl Iterator<Item = &PathBuf> {
65        self.raw_targets.iter().filter(|t| is_full_path(t))
66    }
67}
68
69/// **SWEEP.** Walk the store once and compute its [`Stats`]. Run occasionally
70/// (overview / orientation), never on the interactive loop.
71pub fn compute(store: &Store) -> crate::Result<Stats> {
72    let link_re = wiki_link_regex();
73
74    // First pass: walk every layer once, recording per-file facts and the set
75    // of node ids that exist on disk. Link resolution waits for the second
76    // pass, once every node's existence is known.
77    let mut existing_nodes: HashSet<PathBuf> = HashSet::new();
78    let mut facts: Vec<FileFacts> = Vec::new();
79
80    for layer in Layer::all() {
81        let layer_root = store.root.join(layer_dir_name(layer));
82        for abs in walk_layer_content_files(&layer_root)? {
83            let rel = abs.strip_prefix(&store.root).unwrap_or(&abs).to_path_buf();
84            let node_id = strip_md(&rel);
85            existing_nodes.insert(node_id.clone());
86
87            let size_bytes = std::fs::metadata(&abs).map(|m| m.len()).unwrap_or(0);
88            let text = std::fs::read_to_string(&abs).unwrap_or_default();
89            let type_ = parse_type(&text);
90            let raw_targets = extract_link_targets(&text, &link_re);
91
92            facts.push(FileFacts {
93                node_id,
94                layer,
95                size_bytes,
96                type_,
97                raw_targets,
98            });
99        }
100    }
101
102    // Second pass: classify every file's links against the complete node set,
103    // counting broken links (full-path targets with no file on disk) and
104    // recording which nodes receive an incoming edge. Short-form targets are a
105    // validation error elsewhere, not a stats edge, so they're skipped here:
106    // they neither wire a file in nor count as broken.
107    let mut stats = Stats::default();
108    let mut linked_to: HashSet<PathBuf> = HashSet::new();
109    for file in &facts {
110        for target in file.resolvable_targets() {
111            // A self-link is not a graph edge — skip it (matches `graph::orphans`,
112            // so the two surfaces agree on whether a self-only-linking file is an
113            // orphan). It is neither incoming nor broken.
114            if target == &file.node_id {
115                continue;
116            }
117            if existing_nodes.contains(target) {
118                linked_to.insert(target.clone());
119            } else {
120                // Broken links count occurrences, not distinct targets.
121                stats.broken_link_count += 1;
122            }
123        }
124    }
125
126    // Third pass: roll the per-file facts up into the aggregate Stats. A file is
127    // an orphan iff it has neither a resolvable outgoing edge nor an incoming one.
128    for file in &facts {
129        stats.total_files += 1;
130        *stats.files_per_layer.entry(file.layer).or_insert(0) += 1;
131        stats.total_size_bytes += file.size_bytes;
132
133        if let Some(t) = &file.type_ {
134            *stats.type_distribution.entry(t.clone()).or_insert(0) += 1;
135        }
136
137        let has_outgoing = file
138            .resolvable_targets()
139            .any(|t| t != &file.node_id && existing_nodes.contains(t));
140        let has_incoming = linked_to.contains(&file.node_id);
141        if !has_outgoing && !has_incoming {
142            stats.orphan_count += 1;
143        }
144    }
145
146    stats.top_types = top_types(&stats.type_distribution, TOP_TYPES_LIMIT);
147
148    Ok(stats)
149}
150
151/// On-disk folder name for a layer. Local copy so `stats` doesn't couple to
152/// [`Layer::dir_name`].
153fn layer_dir_name(layer: Layer) -> &'static str {
154    match layer {
155        Layer::Sources => "sources",
156        Layer::Records => "records",
157        Layer::Wiki => "wiki",
158    }
159}
160
161/// Recursively collect the `.md` **content** files under one layer root,
162/// skipping hidden entries (`.git`, dotfiles), the `log/` archive tree, and the
163/// `index.md` catalog meta files. Returns absolute paths. A missing layer root
164/// yields an empty list (a store need not have all three layers).
165fn walk_layer_content_files(layer_root: &Path) -> crate::Result<Vec<PathBuf>> {
166    let mut out = Vec::new();
167    if !layer_root.is_dir() {
168        return Ok(out);
169    }
170    let walker = walkdir::WalkDir::new(layer_root)
171        .into_iter()
172        .filter_entry(|e| {
173            // Skip hidden dirs/files and any `log` directory wholesale.
174            let name = e.file_name().to_string_lossy();
175            if name.starts_with('.') {
176                return false;
177            }
178            if e.file_type().is_dir() && name == "log" {
179                return false;
180            }
181            true
182        });
183    for entry in walker {
184        let entry = entry.map_err(|e| {
185            crate::Error::Io(
186                e.into_io_error()
187                    .unwrap_or_else(|| std::io::Error::other("walk error")),
188            )
189        })?;
190        if !entry.file_type().is_file() {
191            continue;
192        }
193        let path = entry.path();
194        let name = entry.file_name().to_string_lossy();
195        // Content files are `.md`; `index.md` is a meta catalog file, not
196        // content, and `index.jsonl` / other sidecars aren't `.md` at all.
197        if !name.ends_with(".md") || name == "index.md" {
198            continue;
199        }
200        out.push(path.to_path_buf());
201    }
202    out.sort();
203    Ok(out)
204}
205
206/// The wiki-link matcher: `[[target]]` or `[[target|display]]`. Captures the
207/// target (group 1), excluding `]` and `|`. Anchored on the literal brackets so
208/// it ignores `[markdown](links)`.
209fn wiki_link_regex() -> Regex {
210    // `[^\[\]|]+` keeps the target free of brackets and the display pipe.
211    Regex::new(r"\[\[([^\[\]|]+)(?:\|[^\]]*)?\]\]").expect("static wiki-link regex is valid")
212}
213
214/// Every wiki-link target in a file's full text (frontmatter + body), trimmed,
215/// with any trailing `.md` removed. Order-preserving; not deduped.
216///
217/// Fenced code blocks (```/~~~) are skipped, mirroring
218/// `validate::extract_wiki_links`: a `[[...]]` that lives only inside a code
219/// fence is illustrative syntax in a doc, not a graph edge, so stats must not
220/// count it as broken or use it to un-orphan a file. (Frontmatter never carries
221/// code fences, so this scan stays line-based over the whole file without
222/// dropping the frontmatter links stats deliberately counts as edges.)
223fn extract_link_targets(text: &str, re: &Regex) -> Vec<PathBuf> {
224    let mut out = Vec::new();
225    let mut in_fence = false;
226    for line in text.lines() {
227        let trimmed = line.trim_start();
228        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
229            in_fence = !in_fence;
230            continue;
231        }
232        if in_fence {
233            continue;
234        }
235        for cap in re.captures_iter(line) {
236            if let Some(m) = cap.get(1) {
237                let raw = m.as_str().trim();
238                out.push(strip_md(Path::new(raw)));
239            }
240        }
241    }
242    out
243}
244
245/// Drop a trailing `.md` from a path, leaving everything else intact.
246fn strip_md(path: &Path) -> PathBuf {
247    let s = path.to_string_lossy();
248    match s.strip_suffix(".md") {
249        Some(stem) => PathBuf::from(stem),
250        None => path.to_path_buf(),
251    }
252}
253
254/// True if a wiki-link target is a full store-relative path: it has a path
255/// separator AND its first segment is a recognized layer (`sources`/`records`/
256/// `wiki`) with a non-empty remainder. Short-form targets like `sarah-chen`
257/// are false, and so are non-layer multi-segment targets like
258/// `contacts/sarah-chen` (a missing layer prefix). Doctrine: only true
259/// store-relative paths resolve to a node.
260///
261/// This mirrors `validate::is_full_store_path` so `stats.broken_link_count`
262/// agrees with `validate`'s `WIKI_LINK_BROKEN` total: a non-layer target like
263/// `[[contacts/sarah]]` is a short-form error in `validate` (never broken), and
264/// must likewise be excluded here rather than counted as a broken edge.
265fn is_full_path(target: &Path) -> bool {
266    let mut parts = target.components();
267    let first = match parts.next() {
268        Some(std::path::Component::Normal(s)) => s.to_string_lossy(),
269        _ => return false,
270    };
271    let has_rest = parts.next().is_some();
272    matches!(first.as_ref(), "sources" | "records" | "wiki") && has_rest
273}
274
275/// Read the `type:` value from a file's leading YAML frontmatter block, if the
276/// file has one. Returns `None` when there's no frontmatter or no `type` key.
277/// Self-contained (does not route through the crate's parser): split on the
278/// `---` fences, parse the block as a YAML mapping, read `type` as a string.
279fn parse_type(text: &str) -> Option<String> {
280    let yaml = frontmatter_block(text)?;
281    let value: serde_norway::Value = serde_norway::from_str(&yaml).ok()?;
282    let mapping = value.as_mapping()?;
283    let type_val = mapping.get(serde_norway::Value::String("type".to_string()))?;
284    let s = type_val.as_str()?.trim();
285    if s.is_empty() {
286        None
287    } else {
288        Some(s.to_string())
289    }
290}
291
292/// Extract the raw YAML between a leading `---` fence and its closing `---`.
293/// The opening fence must be the very first line of the file (the universal
294/// frontmatter contract: frontmatter is the first thing in the file).
295fn frontmatter_block(text: &str) -> Option<String> {
296    // Normalize away a leading BOM, but require `---` as the first line.
297    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
298    let mut lines = text.lines();
299    let first = lines.next()?;
300    if first.trim_end() != "---" {
301        return None;
302    }
303    let mut body = String::new();
304    for line in lines {
305        if line.trim_end() == "---" {
306            return Some(body);
307        }
308        body.push_str(line);
309        body.push('\n');
310    }
311    // No closing fence: not a valid frontmatter block.
312    None
313}
314
315/// Sort a type distribution into the top `limit` types by count descending,
316/// ties broken by type name ascending.
317fn top_types(dist: &BTreeMap<String, usize>, limit: usize) -> Vec<(String, usize)> {
318    let mut pairs: Vec<(String, usize)> = dist.iter().map(|(k, v)| (k.clone(), *v)).collect();
319    // BTreeMap iteration is already name-ascending; a stable sort by count
320    // descending therefore yields (count desc, name asc).
321    pairs.sort_by_key(|p| std::cmp::Reverse(p.1));
322    pairs.truncate(limit);
323    pairs
324}
325
326#[cfg(test)]
327mod tests {
328    use super::*;
329    use crate::parser::Config;
330    use std::fs;
331    use tempfile::TempDir;
332
333    /// Build a `Store` rooted at a fresh tempdir with an empty `DB.md` marker.
334    /// Bypasses `Store::open` by constructing the struct directly —
335    /// `stats::compute` only reads `store.root`.
336    fn temp_store() -> (TempDir, Store) {
337        let dir = TempDir::new().expect("tempdir");
338        fs::write(dir.path().join("DB.md"), "---\ntype: db-md\n---\n").expect("write DB.md");
339        let store = Store {
340            root: dir.path().to_path_buf(),
341            config: Config::default(),
342        };
343        (dir, store)
344    }
345
346    /// Write a content file at a store-relative path, creating parent dirs.
347    fn write_rel(store: &Store, rel: &str, contents: &str) {
348        let abs = store.root.join(rel);
349        if let Some(parent) = abs.parent() {
350            fs::create_dir_all(parent).expect("mkdir parents");
351        }
352        fs::write(abs, contents).expect("write content file");
353    }
354
355    /// A minimal content file body: frontmatter with the given type, no links.
356    fn doc(type_: &str, summary: &str) -> String {
357        format!("---\ntype: {type_}\nsummary: \"{summary}\"\n---\n\nbody\n")
358    }
359
360    #[test]
361    fn empty_store_is_all_zeros() {
362        let (_d, store) = temp_store();
363        let s = compute(&store).expect("compute");
364        assert_eq!(s.total_files, 0);
365        assert_eq!(s.total_size_bytes, 0);
366        assert!(s.files_per_layer.is_empty());
367        assert!(s.type_distribution.is_empty());
368        assert_eq!(s.orphan_count, 0);
369        assert_eq!(s.broken_link_count, 0);
370        assert!(s.top_types.is_empty());
371    }
372
373    #[test]
374    fn counts_files_per_layer_and_total() {
375        let (_d, store) = temp_store();
376        write_rel(&store, "sources/emails/a.md", &doc("email", "a"));
377        write_rel(&store, "sources/emails/b.md", &doc("email", "b"));
378        write_rel(&store, "records/contacts/c.md", &doc("contact", "c"));
379        write_rel(&store, "wiki/people/p.md", &doc("wiki-page", "p"));
380
381        let s = compute(&store).expect("compute");
382        assert_eq!(s.total_files, 4);
383        assert_eq!(s.files_per_layer.get(&Layer::Sources), Some(&2));
384        assert_eq!(s.files_per_layer.get(&Layer::Records), Some(&1));
385        assert_eq!(s.files_per_layer.get(&Layer::Wiki), Some(&1));
386    }
387
388    #[test]
389    fn ignores_meta_files_and_non_md_and_dotdirs_and_log() {
390        let (_d, store) = temp_store();
391        // Real content.
392        write_rel(&store, "records/contacts/real.md", &doc("contact", "real"));
393        // Meta + non-content that must NOT be counted.
394        write_rel(
395            &store,
396            "records/contacts/index.md",
397            "---\ntype: index\nscope: type-folder\n---\n",
398        );
399        write_rel(&store, "records/contacts/index.jsonl", "{}\n");
400        write_rel(&store, "records/notes.txt", "not markdown\n");
401        // `log/` archive tree under a layer is skipped wholesale.
402        write_rel(&store, "sources/log/2026-04.md", &doc("email", "archived"));
403        // Hidden dir contents are skipped.
404        write_rel(
405            &store,
406            "wiki/.obsidian/cache.md",
407            &doc("wiki-page", "hidden"),
408        );
409
410        let s = compute(&store).expect("compute");
411        assert_eq!(s.total_files, 1, "only the one real content file counts");
412        assert_eq!(s.files_per_layer.get(&Layer::Records), Some(&1));
413        assert_eq!(s.files_per_layer.get(&Layer::Sources), None);
414        assert_eq!(s.files_per_layer.get(&Layer::Wiki), None);
415    }
416
417    #[test]
418    fn total_size_is_sum_of_content_file_bytes() {
419        let (_d, store) = temp_store();
420        let a = doc("email", "a");
421        let b = "---\ntype: contact\nsummary: x\n---\n\nlonger body text here\n".to_string();
422        write_rel(&store, "sources/emails/a.md", &a);
423        write_rel(&store, "records/contacts/b.md", &b);
424        // A skipped file's bytes must not be included.
425        write_rel(
426            &store,
427            "records/contacts/index.md",
428            "---\ntype: index\n---\nbig meta file padding padding\n",
429        );
430
431        let s = compute(&store).expect("compute");
432        let expected = a.len() as u64 + b.len() as u64;
433        assert_eq!(s.total_size_bytes, expected);
434    }
435
436    #[test]
437    fn type_distribution_counts_each_type_value() {
438        let (_d, store) = temp_store();
439        write_rel(&store, "sources/emails/a.md", &doc("email", "a"));
440        write_rel(&store, "sources/emails/b.md", &doc("email", "b"));
441        write_rel(&store, "sources/emails/c.md", &doc("email", "c"));
442        write_rel(&store, "records/contacts/d.md", &doc("contact", "d"));
443        write_rel(&store, "records/proposals/e.md", &doc("proposal", "e"));
444
445        let s = compute(&store).expect("compute");
446        assert_eq!(s.type_distribution.get("email"), Some(&3));
447        assert_eq!(s.type_distribution.get("contact"), Some(&1));
448        assert_eq!(s.type_distribution.get("proposal"), Some(&1));
449        assert_eq!(s.type_distribution.len(), 3);
450    }
451
452    #[test]
453    fn file_without_type_is_counted_in_totals_but_not_distribution() {
454        let (_d, store) = temp_store();
455        // A content file with frontmatter but no `type:` key.
456        write_rel(
457            &store,
458            "wiki/themes/x.md",
459            "---\nsummary: no type here\n---\n\nbody\n",
460        );
461        // A content file with no frontmatter at all.
462        write_rel(&store, "wiki/themes/y.md", "just a body, no frontmatter\n");
463
464        let s = compute(&store).expect("compute");
465        assert_eq!(s.total_files, 2, "untyped files still count toward totals");
466        assert_eq!(s.files_per_layer.get(&Layer::Wiki), Some(&2));
467        assert!(
468            s.type_distribution.is_empty(),
469            "no type key => no distribution entry, not an empty-string bucket"
470        );
471    }
472
473    #[test]
474    fn top_types_orders_by_count_desc_then_name_asc() {
475        let (_d, store) = temp_store();
476        // contact x3, email x3 (tie), decision x1.
477        write_rel(&store, "records/contacts/c1.md", &doc("contact", "1"));
478        write_rel(&store, "records/contacts/c2.md", &doc("contact", "2"));
479        write_rel(&store, "records/contacts/c3.md", &doc("contact", "3"));
480        write_rel(&store, "sources/emails/e1.md", &doc("email", "1"));
481        write_rel(&store, "sources/emails/e2.md", &doc("email", "2"));
482        write_rel(&store, "sources/emails/e3.md", &doc("email", "3"));
483        write_rel(&store, "records/decisions/d1.md", &doc("decision", "1"));
484
485        let s = compute(&store).expect("compute");
486        assert_eq!(
487            s.top_types,
488            vec![
489                ("contact".to_string(), 3),
490                ("email".to_string(), 3),
491                ("decision".to_string(), 1),
492            ],
493            "ties (contact, email both 3) break by name ascending; decision trails"
494        );
495    }
496
497    #[test]
498    fn top_types_is_capped_at_ten() {
499        let (_d, store) = temp_store();
500        // 12 distinct custom types, each one file.
501        for i in 0..12 {
502            let t = format!("type{i:02}");
503            write_rel(&store, &format!("records/{t}/f.md"), &doc(&t, "x"));
504        }
505        let s = compute(&store).expect("compute");
506        assert_eq!(s.top_types.len(), 10, "top_types caps at 10");
507        assert_eq!(
508            s.type_distribution.len(),
509            12,
510            "distribution keeps all types"
511        );
512    }
513
514    #[test]
515    fn orphans_are_files_with_no_incoming_and_no_outgoing_links() {
516        let (_d, store) = temp_store();
517        // a -> b (a has outgoing, b has incoming). c is isolated => orphan.
518        write_rel(
519            &store,
520            "records/contacts/a.md",
521            "---\ntype: contact\nsummary: a\n---\n\nSee [[records/contacts/b]].\n",
522        );
523        write_rel(&store, "records/contacts/b.md", &doc("contact", "b"));
524        write_rel(&store, "records/contacts/c.md", &doc("contact", "c"));
525
526        let s = compute(&store).expect("compute");
527        assert_eq!(s.orphan_count, 1, "only c is an orphan");
528    }
529
530    #[test]
531    fn a_file_with_only_a_self_link_is_an_orphan_matching_graph() {
532        let (_d, store) = temp_store();
533        // A file that links only to ITSELF has no real graph edge, so it must be
534        // an orphan — consistent with `graph::orphans` (which skips self-links).
535        write_rel(
536            &store,
537            "records/contacts/solo.md",
538            "---\ntype: contact\nsummary: solo\n---\n\nSee [[records/contacts/solo]].\n",
539        );
540        let s = compute(&store).expect("compute");
541        assert_eq!(
542            s.orphan_count, 1,
543            "a self-only-linking file is an orphan: {s:?}"
544        );
545    }
546
547    #[test]
548    fn a_file_with_only_an_incoming_link_is_not_an_orphan() {
549        let (_d, store) = temp_store();
550        // b has no outgoing links, but a links to it => b is NOT an orphan.
551        // a itself has an outgoing link => also not an orphan. Zero orphans.
552        write_rel(
553            &store,
554            "wiki/people/a.md",
555            "---\ntype: wiki-page\nsummary: a\n---\n\n[[wiki/people/b]]\n",
556        );
557        write_rel(&store, "wiki/people/b.md", &doc("wiki-page", "b"));
558
559        let s = compute(&store).expect("compute");
560        assert_eq!(s.orphan_count, 0);
561    }
562
563    #[test]
564    fn frontmatter_wiki_links_count_as_edges_for_orphans() {
565        let (_d, store) = temp_store();
566        // The link lives in a frontmatter field, not the body. It must still
567        // wire `contact` -> `company`, so neither is an orphan.
568        write_rel(
569            &store,
570            "records/contacts/sarah.md",
571            "---\ntype: contact\nsummary: s\ncompany: [[records/companies/acme]]\n---\n\nbody\n",
572        );
573        write_rel(&store, "records/companies/acme.md", &doc("company", "acme"));
574
575        let s = compute(&store).expect("compute");
576        assert_eq!(
577            s.orphan_count, 0,
578            "a frontmatter wiki-link is a real edge; neither endpoint is orphaned"
579        );
580    }
581
582    #[test]
583    fn broken_links_count_targets_that_do_not_exist() {
584        let (_d, store) = temp_store();
585        // Two links: one to an existing file, one to a missing file.
586        write_rel(
587            &store,
588            "wiki/people/a.md",
589            "---\ntype: wiki-page\nsummary: a\n---\n\n[[wiki/people/b]] and [[records/contacts/ghost]]\n",
590        );
591        write_rel(&store, "wiki/people/b.md", &doc("wiki-page", "b"));
592
593        let s = compute(&store).expect("compute");
594        assert_eq!(s.broken_link_count, 1, "only the ghost target is broken");
595    }
596
597    #[test]
598    fn broken_link_resolves_with_md_extension_stripped() {
599        let (_d, store) = temp_store();
600        // Link written WITH a `.md` extension still resolves to the real file
601        // (the parser accepts `.md`; validate only warns). Not broken.
602        write_rel(
603            &store,
604            "wiki/people/a.md",
605            "---\ntype: wiki-page\nsummary: a\n---\n\n[[wiki/people/b.md]]\n",
606        );
607        write_rel(&store, "wiki/people/b.md", &doc("wiki-page", "b"));
608
609        let s = compute(&store).expect("compute");
610        assert_eq!(
611            s.broken_link_count, 0,
612            "a `.md`-suffixed target resolves to the same node and is not broken"
613        );
614    }
615
616    #[test]
617    fn short_form_links_are_not_broken_and_do_not_wire_the_graph() {
618        let (_d, store) = temp_store();
619        // `[[b]]` is a short-form (no `/`): a validation error elsewhere, but
620        // for stats it neither counts as broken (it doesn't resolve to a node)
621        // nor wires `a` into the graph. So `a` (no other links) is an orphan.
622        write_rel(
623            &store,
624            "records/contacts/a.md",
625            "---\ntype: contact\nsummary: a\n---\n\n[[b]]\n",
626        );
627        write_rel(&store, "records/contacts/b.md", &doc("contact", "b"));
628
629        let s = compute(&store).expect("compute");
630        assert_eq!(
631            s.broken_link_count, 0,
632            "short-form links are not counted as broken by stats"
633        );
634        // a has only a short-form link (not an edge) => orphan. b has no links
635        // and no real incoming edge => orphan. Both orphaned.
636        assert_eq!(s.orphan_count, 2);
637    }
638
639    #[test]
640    fn display_alias_links_resolve_to_the_target_not_the_alias() {
641        let (_d, store) = temp_store();
642        // `[[wiki/people/b|Bob]]` targets b, displays "Bob". The alias must be
643        // stripped: the edge goes to b (exists), so it's not broken and b is
644        // not an orphan.
645        write_rel(
646            &store,
647            "wiki/people/a.md",
648            "---\ntype: wiki-page\nsummary: a\n---\n\nmet [[wiki/people/b|Bob]] today\n",
649        );
650        write_rel(&store, "wiki/people/b.md", &doc("wiki-page", "b"));
651
652        let s = compute(&store).expect("compute");
653        assert_eq!(s.broken_link_count, 0, "alias target resolves and exists");
654        assert_eq!(s.orphan_count, 0, "a links out, b is linked to");
655    }
656
657    #[test]
658    fn duplicate_links_in_one_file_count_broken_per_occurrence() {
659        let (_d, store) = temp_store();
660        // The same missing target twice => two broken-link occurrences.
661        write_rel(
662            &store,
663            "wiki/people/a.md",
664            "---\ntype: wiki-page\nsummary: a\n---\n\n[[records/contacts/ghost]] [[records/contacts/ghost]]\n",
665        );
666        let s = compute(&store).expect("compute");
667        assert_eq!(
668            s.broken_link_count, 2,
669            "broken links count occurrences, not distinct targets"
670        );
671    }
672
673    #[test]
674    fn markdown_links_are_not_treated_as_wiki_links() {
675        let (_d, store) = temp_store();
676        // A standard markdown link to an external URL must not register as a
677        // wiki edge (so this file stays an orphan) nor as a broken link.
678        write_rel(
679            &store,
680            "wiki/people/a.md",
681            "---\ntype: wiki-page\nsummary: a\n---\n\nSee [Acme](https://acme.io/path).\n",
682        );
683        let s = compute(&store).expect("compute");
684        assert_eq!(s.broken_link_count, 0, "markdown links aren't graph edges");
685        assert_eq!(s.orphan_count, 1, "the file has no wiki-links => orphan");
686    }
687
688    #[test]
689    fn regression_non_layer_multi_segment_link_is_not_broken() {
690        // Finding #20: a target like `[[contacts/sarah-chen]]` omits the layer
691        // prefix. It has a `/` but its first segment (`contacts`) is not a
692        // recognized layer, so it's a short-form error in `validate`, NOT a
693        // broken link. stats must agree: it counts neither as broken nor as an
694        // outgoing edge. Pre-fix `is_full_path` (components().count() > 1)
695        // accepted it and reported broken_link_count = 1.
696        let (_d, store) = temp_store();
697        write_rel(
698            &store,
699            "records/contacts/a.md",
700            "---\ntype: contact\nsummary: a\n---\n\nSee [[contacts/sarah-chen]].\n",
701        );
702        let s = compute(&store).expect("compute");
703        assert_eq!(
704            s.broken_link_count, 0,
705            "a non-layer multi-segment target is a short-form error, not broken"
706        );
707        // The non-layer link is not a graph edge, so `a` has no outgoing edge
708        // and is an orphan — matching how validate/graph treat it.
709        assert_eq!(
710            s.orphan_count, 1,
711            "the non-layer link does not wire `a` out of orphan status"
712        );
713    }
714
715    #[test]
716    fn regression_wiki_links_in_code_fences_are_ignored() {
717        // Finding #21: a wiki-link that appears only inside a fenced code block
718        // is illustrative syntax, not a graph edge. validate skips fenced
719        // regions; stats must too. Pre-fix the regex ran over the whole file
720        // with no fence tracking, so the fenced ghost link inflated
721        // broken_link_count to 1 and the fenced real link un-orphaned the page.
722        let (_d, store) = temp_store();
723        // A howto page whose ONLY wiki-links live inside ``` and ~~~ fences:
724        // one to a missing target, one to an existing target.
725        write_rel(
726            &store,
727            "wiki/pages/howto.md",
728            "---\ntype: wiki-page\nsummary: howto\n---\n\
729             \nWrite links like this:\n\
730             \n```\n[[records/contacts/ghost]]\n```\n\
731             \nor this:\n\
732             \n~~~\n[[wiki/pages/real]]\n~~~\n",
733        );
734        write_rel(&store, "wiki/pages/real.md", &doc("wiki-page", "real"));
735        let s = compute(&store).expect("compute");
736        assert_eq!(
737            s.broken_link_count, 0,
738            "a `[[...]]` inside a code fence is not a real (broken) edge"
739        );
740        // howto has no real edges => orphan. real is not linked-to by any real
741        // edge => orphan. Both orphaned (2), proving the fenced link to `real`
742        // did not wire either file out of orphan status.
743        assert_eq!(
744            s.orphan_count, 2,
745            "fenced wiki-links do not wire files out of orphan status: {s:?}"
746        );
747    }
748
749    #[test]
750    fn a_link_to_an_existing_file_in_another_layer_resolves() {
751        let (_d, store) = temp_store();
752        // wiki page links to a source file in a different layer; cross-layer
753        // full-path links resolve like any other.
754        write_rel(
755            &store,
756            "wiki/people/a.md",
757            "---\ntype: wiki-page\nsummary: a\n---\n\nfrom [[sources/emails/2026/05/m]]\n",
758        );
759        write_rel(&store, "sources/emails/2026/05/m.md", &doc("email", "m"));
760
761        let s = compute(&store).expect("compute");
762        assert_eq!(s.broken_link_count, 0);
763        assert_eq!(s.orphan_count, 0, "both endpoints are wired");
764    }
765}