Skip to main content

dbmd_core/
graph.rs

1//! `graph` — the wiki-link **relationship layer**.
2//!
3//! Wiki-links are curated-relevance edges (the LLM wrote them), so the graph's
4//! job is to **assemble the relevant context around a seed**, not to be
5//! analyzed. **All ops are on-demand — there is no maintained graph** (a
6//! persistent graph is the roadmap engine).
7//!
8//! [`backlinks`] / [`forwardlinks`] are loop ops (O(changed), never O(store)).
9//! [`neighborhood`] is the high-value context-hydration op. [`orphans`] is a
10//! SWEEP curation worklist.
11//!
12//! Whole-graph analytics (connected components, cycle detection, shortest
13//! path, sinks/sources, DOT/JSON export) are deliberately **not** here — a
14//! human studying the graph opens the store in Obsidian; broken-link detection
15//! is [`crate::validate`]'s job (`WIKI_LINK_BROKEN`).
16//!
17//! ## Implementation note — two paths for the incoming-edge scan
18//!
19//! The scale contract (SPEC § Tooling, plan: *"the interactive loop is
20//! O(changed), never O(store)"*) is the load-bearing rule here. [`backlinks`]
21//! is a loop op, so it must **not** open and `read_to_string` every content file
22//! in the store on each call. It resolves incoming edges by one of two paths,
23//! chosen by whether the call is scoped:
24//!
25//! - **Unscoped** (`dbmd graph backlinks <x>`, no `--type`/`--in`): one
26//!   embedded-ripgrep pass for the literal `[[<target>]]` over the tree, via
27//!   [`Store::find_links_to`] (`grep` + `ignore`, early-exit per file) — the
28//!   same scan engine [`crate::validate`]'s working-set incoming-linker step
29//!   uses. A single store traversal with cheap presence-only matching, not N
30//!   whole-file parses; that is what keeps the unscoped call inside the loop
31//!   budget. [`backlinks`] then filters the raw hits to content files and emits
32//!   canonical bare targets (its relationship view), where the lower-level
33//!   [`Store::find_links_to`] returns every `.md` the text appears in.
34//! - **Scoped** (`--type` / `--in`): the candidate set is enumerated from the
35//!   relevant layer's `index.jsonl` sidecars — the sidecars of the one layer the
36//!   `--type` belongs to (via [`Store::sidecar_records`]), filtered to that type
37//!   — and each candidate is confirmed by a single-file parse. That is what makes
38//!   `--type` / `--in` an *I/O* scope, not just a result filter: a typed/layer-scoped
39//!   `backlinks` reads only the relevant layer's sidecars (O(entities-in-layer))
40//!   and parses only those files. A type's records can span several folders within
41//!   its layer (`wiki-page` under any `wiki/<topic>/`), so the read is layer-wide,
42//!   not a single canonical folder — otherwise off-canonical-folder linkers would
43//!   be silently dropped.
44//!
45//! **Why the scoped path confirms by parsing the candidate, not by trusting the
46//! sidecar's `links` field.** A sidecar record's `links` is the file's
47//! *frontmatter* `links:` list only — it does **not** capture wiki-links written
48//! in the body or inside other typed frontmatter fields (`company: [[…]]`,
49//! `attendees: [ … ]`, `derived_from: [ … ]`). [`forwardlinks`] extracts edges
50//! from the whole file, so to keep the two directions on the **same** edge set
51//! (an incoming edge to X is exactly: some file whose [`forwardlinks`] contains
52//! X) the incoming-edge confirmation re-parses each candidate file the same way.
53//! The sidecar bounds *which* files are candidates; the parse decides whether
54//! each truly links. The unscoped ripgrep path stays on that same edge set by
55//! matching the link text wherever it lives in the file (frontmatter or body).
56//! A node's `summary` / `type` likewise read frontmatter directly (the source of
57//! truth the sidecar is derived from; never stale).
58
59use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
60use std::io;
61use std::path::{Path, PathBuf};
62
63use ignore::WalkBuilder;
64use regex::Regex;
65
66use crate::index::IndexRecord;
67use crate::store::{layer_for_type, Layer, Store, StoreError};
68
69/// Which edge directions a traversal follows.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum Direction {
72    /// Incoming edges only (backlinks).
73    Incoming,
74    /// Outgoing edges only (forwardlinks).
75    Outgoing,
76    /// Both directions.
77    Both,
78}
79
80/// One node reached during a [`neighborhood`] hydration: the file, its
81/// `summary`, and how it connects back toward the seed.
82#[derive(Debug, Clone, PartialEq, Eq)]
83pub struct ContextNode {
84    /// The store-relative path of the reached file.
85    pub path: PathBuf,
86    /// The file's `summary` (read from its sidecar entry / frontmatter).
87    pub summary: String,
88    /// The file's `type`, when known.
89    pub type_: Option<String>,
90    /// Hop distance from the seed (the seed itself is 0).
91    pub hops: u32,
92    /// The relationship edge that brought this node into the slice: the path it
93    /// links to/from one hop closer to the seed, and the direction.
94    pub via: Option<(PathBuf, Direction)>,
95}
96
97/// The readable working-set digest [`neighborhood`] returns: the seed plus the
98/// reached nodes with their summaries and connections. The relationship-axis
99/// "turn a seed into context" primitive.
100#[derive(Debug, Clone, PartialEq, Eq)]
101pub struct ContextSlice {
102    /// The seed the slice was hydrated from.
103    pub seed: PathBuf,
104    /// The reached nodes (excluding the seed), in BFS order.
105    pub nodes: Vec<ContextNode>,
106}
107
108/// Incoming edges to `path`: files that wiki-link to it. The blast-radius /
109/// dependents primitive before an edit. Store-wide (every layer / every type);
110/// see [`backlinks_filtered`] for the `--type` / `--in`-scoped form.
111///
112/// `path` is the store-relative target as it would be written inside a
113/// wiki-link (with or without a trailing `.md`; both resolve to the same
114/// target). Returns each linking file as its **canonical bare wiki-link path**
115/// (store-relative, no `.md`) — the same key [`forwardlinks`] emits, so the two
116/// directions round-trip and [`neighborhood`] can use one node identity.
117/// Deduped, sorted, never including the seed itself.
118pub fn backlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
119    backlinks_filtered(store, path, &[], None)
120}
121
122/// Incoming edges to `path`, scoped by the linking file's `type` and/or layer —
123/// the `dbmd graph backlinks --type/--in` surface.
124///
125/// **Scale (the loop contract).** Two paths, by whether the call is scoped:
126///
127/// - **Unscoped** (`types` empty *and* `layer` `None`): one embedded-ripgrep
128///   pass for `[[<target>]]` across the store via [`Store::find_links_to`] — a
129///   single `grep` + `ignore` traversal with early-exit per file, never a
130///   `read_to_string` of every content file. This is the same scan engine
131///   [`crate::validate::validate_working_set`]'s incoming-linker step rides, and
132///   it keeps the unscoped call inside the loop budget (the old per-candidate
133///   confirm-read re-opened every file in the store → O(store)).
134/// - **Scoped** (`types` and/or `layer` set): the candidate set — the files that
135///   *might* link to `path` — is read from the relevant layer's `index.jsonl`
136///   sidecars, so the call touches only the named layer(s): O(entities-in-layer),
137///   the sanctioned loop cost. Each candidate is then confirmed by a single-file
138///   parse. When `types` lists several types, the sidecars of each type's layer
139///   are read and the candidate sets unioned (filtered to the type), so a type
140///   whose records span multiple folders within its layer (e.g. `wiki-page` under
141///   any `wiki/<topic>/`) is fully covered; a `layer` further restricts the
142///   candidate paths to that layer.
143///
144/// **Correctness (one edge set, both paths).** An incoming edge to X is exactly:
145/// some file whose [`forwardlinks`] contains X — a wiki-link in the body or in
146/// *any* frontmatter field (`company: [[…]]`, `attendees: [ … ]`), not just the
147/// sidecar's frontmatter `links:` projection. Both paths honor that:
148/// - The unscoped scan matches the literal `[[<target>]]` text wherever it lives
149///   in a file (frontmatter or body), the same edges [`forwardlinks`] extracts.
150///   [`Store::find_links_to`] returns *every* `.md` carrying the link text
151///   (including `index.md` catalogs); [`backlinks`] is the relationship view, so
152///   the results are filtered to content files ([`is_content_rel`]) and emitted
153///   as canonical bare targets, self-excluded.
154/// - The scoped path confirms each candidate via [`file_links_to`], which
155///   delegates to [`forwardlinks`] (body + every frontmatter field) — so a
156///   body-only or typed-field edge is caught, not just the sidecar's `links:`
157///   list.
158///
159/// Result form (canonical bare paths, deduped, sorted, seed excluded) is
160/// identical on both paths and matches [`backlinks`].
161pub fn backlinks_filtered(
162    store: &Store,
163    path: &Path,
164    types: &[String],
165    layer: Option<Layer>,
166) -> Result<Vec<PathBuf>, StoreError> {
167    let target = normalize_target(path);
168    if target.is_empty() {
169        return Ok(Vec::new());
170    }
171
172    // Unscoped: one embedded-ripgrep pass over the store (O(store) scan with
173    // early-exit per file), not a per-candidate read of every content file.
174    // `find_links_to` returns every `.md` carrying the link text (incl. catalog
175    // `index.md`); narrow to content files and canonicalize to the bare target
176    // form `backlinks` emits, dropping the seed's self-link.
177    if types.is_empty() && layer.is_none() {
178        let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
179        for rel in store.find_links_to(path)? {
180            if !is_content_rel(&rel) {
181                continue;
182            }
183            let linker = normalize_target(&rel);
184            if linker.is_empty() || linker == target {
185                // A file never counts as its own backlink.
186                continue;
187            }
188            hits.insert(PathBuf::from(linker));
189        }
190        return Ok(hits.into_iter().collect());
191    }
192
193    // Scoped: read only the named folder(s)' sidecars for the candidate set, then
194    // confirm each candidate with a single-file parse — O(folder), the I/O scope
195    // `--type` / `--in` buys.
196    let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
197    for candidate in candidate_records(store, types, layer)? {
198        let rel = &candidate.path;
199        let candidate_target = normalize_target(rel);
200        if candidate_target.is_empty() || candidate_target == target {
201            // A file never counts as its own backlink.
202            continue;
203        }
204        // Confirm the edge by parsing the candidate file the same way
205        // forwardlinks does (body + all frontmatter), so body/typed-field links
206        // are caught — the sidecar's `links` field alone would miss them.
207        if file_links_to(store, rel, &target)? {
208            hits.insert(PathBuf::from(candidate_target));
209        }
210    }
211
212    Ok(hits.into_iter().collect())
213}
214
215/// Outgoing edges from `path`: the wiki-link targets extracted from that single
216/// file. Loop-fast; follow the evidence chain.
217///
218/// `path` is the store-relative path of the file to read. Targets are returned
219/// as store-relative paths (bare, no `.md`), deduped and sorted; the file's
220/// links to itself are dropped. A missing file yields an empty list (a
221/// dangling seed has no outgoing edges to report — broken-link detection is
222/// [`crate::validate`]'s job).
223pub fn forwardlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
224    let self_target = normalize_target(path);
225    let abs = match resolve_existing(store, path) {
226        Some(a) => a,
227        None => return Ok(Vec::new()),
228    };
229    let body = match std::fs::read_to_string(&abs) {
230        Ok(b) => b,
231        // A file that isn't valid UTF-8 (e.g. a binary source) carries no
232        // wiki-links we can extract.
233        Err(e) if e.kind() == io::ErrorKind::InvalidData => return Ok(Vec::new()),
234        Err(e) => return Err(StoreError::Io(e)),
235    };
236
237    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
238    for target in extract_link_targets(&body) {
239        if target.is_empty() || target == self_target {
240            continue;
241        }
242        out.insert(PathBuf::from(target));
243    }
244    Ok(out.into_iter().collect())
245}
246
247/// The candidate set for an incoming-edge scan: the sidecar records that could
248/// link to the target, read from the type-folder `index.jsonl` sidecars (never
249/// a content-tree walk). `types`/`layer` narrow *which* sidecars are read — the
250/// I/O scope that keeps a typed/layer backlinks O(entities-in-layer).
251///
252/// - `types` non-empty: for each type, read **the whole layer** the type belongs
253///   to ([`layer_for_type`] → [`Store::sidecar_records`]) and keep the records of
254///   that `type`, unioned by path across the requested types. A `layer` filter,
255///   when given, intersects with the type's own layer (a type lives in exactly
256///   one layer, so a mismatched `--in` simply yields no candidates).
257/// - `types` empty: every sidecar record under `layer` (or store-wide when
258///   `None`) via [`Store::sidecar_records`].
259///
260/// **Why the whole layer, not just the type's canonical folder.** A `type` can
261/// legitimately span several folders within one layer — `wiki-page` is the
262/// canonical case (SPEC files it under `wiki/<topic>/` for an *arbitrary* topic:
263/// `wiki/topics/`, `wiki/people/`, `wiki/projects/`, …). Reading only the
264/// single canonical-guess folder (`wiki/topics/`) would silently drop every
265/// wiki-page filed elsewhere in the layer, so a scoped `backlinks --type
266/// wiki-page` would under-report dependents the moment that canonical folder
267/// exists — breaking the docstring's promise that the scoped edge set equals the
268/// unscoped one. Reading the type's full layer subtree and filtering by `type`
269/// is complete and still O(entities-in-layer), the sanctioned loop scope.
270fn candidate_records(
271    store: &Store,
272    types: &[String],
273    layer: Option<Layer>,
274) -> Result<Vec<IndexRecord>, StoreError> {
275    if types.is_empty() {
276        return store.sidecar_records(layer);
277    }
278    let mut by_path: std::collections::BTreeMap<PathBuf, IndexRecord> =
279        std::collections::BTreeMap::new();
280    for type_ in types {
281        // A type lives in exactly one layer; read that whole layer's sidecars so
282        // a record filed under a non-canonical folder of the same type (e.g. a
283        // `wiki-page` under `wiki/people/` rather than `wiki/topics/`) is still a
284        // candidate. An explicit `--in` layer that disagrees with the type's
285        // layer can never match the type, so skip the read entirely.
286        let type_layer = layer_for_type(type_);
287        if let Some(scope) = layer {
288            if scope != type_layer {
289                continue;
290            }
291        }
292        for rec in store.sidecar_records(Some(type_layer))? {
293            if rec.type_ == *type_ {
294                by_path.insert(rec.path.clone(), rec);
295            }
296        }
297    }
298    Ok(by_path.into_values().collect())
299}
300
301/// True if the store file at `rel` carries a wiki-link whose canonical target
302/// equals `target`. Delegates to [`forwardlinks`] so the incoming-edge predicate
303/// is *exactly* the outgoing-edge extraction — body + every frontmatter field —
304/// keeping the two directions on one edge set. `forwardlinks` already emits
305/// canonical bare targets, so `target` (likewise normalized by the caller) is
306/// compared directly. A missing/binary file links to nothing.
307fn file_links_to(store: &Store, rel: &Path, target: &str) -> Result<bool, StoreError> {
308    let edges = forwardlinks(store, rel)?;
309    Ok(edges.iter().any(|e| e.as_os_str() == target))
310}
311
312/// **Context hydration.** Bounded BFS from `seed` over backlinks + forwardlinks
313/// out to `hops`, reading each reached file's `summary` + relationship, and
314/// returning a readable [`ContextSlice`]. Optionally filtered by `types` and
315/// `direction`. On-demand; no maintained graph. What the agent reaches for to
316/// assemble a working set in one call.
317///
318/// Traversal semantics:
319/// - **`hops`** bounds true graph distance from the seed. `hops == 0` returns
320///   an empty slice (the seed alone is no context).
321/// - **`direction`** selects which edges are followed: `Incoming` walks
322///   backlinks, `Outgoing` walks forwardlinks, `Both` walks the union.
323/// - **`types`**, when non-empty, filters which reached nodes appear in the
324///   slice — but traversal still passes *through* off-type nodes, so a
325///   `meeting` two hops out is still reachable through a `contact` even when
326///   filtering to `meeting`. (An empty `types` slice imposes no filter.)
327/// - Each node records the lowest hop count at which it is first reached (BFS
328///   order); the seed is never included as a node.
329///
330/// Unbounded traversal: delegates to [`neighborhood_capped`] with no node cap, so
331/// it expands every reachable node within `hops`. For a densely-interlinked store
332/// this is one full-store backlinks scan **per reached node** (O(visited × store))
333/// — prefer [`neighborhood_capped`] with a `max_nodes` cap to bound that work.
334pub fn neighborhood(
335    store: &Store,
336    seed: &Path,
337    hops: u32,
338    types: &[String],
339    direction: Direction,
340) -> Result<ContextSlice, StoreError> {
341    neighborhood_capped(store, seed, hops, types, direction, None)
342}
343
344/// [`neighborhood`] with a hard cap on how many nodes the BFS **traverses**.
345///
346/// `max_nodes` bounds the *traversal*, not just the result: each node the BFS
347/// expands triggers a per-node incoming-edge scan (an unscoped [`backlinks`] is a
348/// full-store ripgrep pass), so an uncapped neighborhood of a hub node costs
349/// O(visited × store). A post-hoc `.take(n)` on the returned nodes caps the
350/// *output* but not that work — the scans still run for every reached node. This
351/// cap stops discovering (and therefore stops scanning) once `max_nodes` distinct
352/// non-seed nodes have entered the BFS, so the expensive per-node scans are bounded
353/// to at most `max_nodes` of them. `None` is unbounded (the [`neighborhood`]
354/// behavior).
355///
356/// The cap is applied at *discovery* in BFS order, so the kept nodes are exactly
357/// the first `max_nodes` reached (closest-first by hop), and each still records its
358/// true minimum hop distance. Type-filtered (off-type) nodes count against the cap
359/// because the BFS must still traverse *through* them to reach deeper on-type
360/// nodes — the scan cost is paid when a node is expanded, on- or off-type alike.
361pub fn neighborhood_capped(
362    store: &Store,
363    seed: &Path,
364    hops: u32,
365    types: &[String],
366    direction: Direction,
367    max_nodes: Option<usize>,
368) -> Result<ContextSlice, StoreError> {
369    let seed_rel = PathBuf::from(normalize_target(seed));
370    let type_filter: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
371
372    // `discovered` guards against revisiting a node (and against re-adding the
373    // seed). BFS by levels so the first time we reach a node is its true min
374    // hop distance.
375    let mut discovered: HashSet<PathBuf> = HashSet::new();
376    discovered.insert(seed_rel.clone());
377
378    let mut nodes: Vec<ContextNode> = Vec::new();
379    let mut frontier: VecDeque<PathBuf> = VecDeque::new();
380    frontier.push_back(seed_rel.clone());
381
382    // Count of distinct non-seed nodes admitted to the BFS. Once it hits
383    // `max_nodes` we stop discovering new nodes, which stops enqueuing them, which
384    // stops the per-node full-store backlinks scan they would have triggered — the
385    // cap bounds the *traversal cost*, not only the printed result.
386    let mut admitted = 0usize;
387    let cap_reached = |admitted: usize| max_nodes.is_some_and(|cap| admitted >= cap);
388
389    let mut hop = 0u32;
390    while hop < hops && !frontier.is_empty() && !cap_reached(admitted) {
391        hop += 1;
392        let level_size = frontier.len();
393        for _ in 0..level_size {
394            if cap_reached(admitted) {
395                break;
396            }
397            let current = frontier.pop_front().expect("frontier non-empty");
398
399            // Collect this node's edges in the requested direction(s). Each
400            // edge carries the neighbor path + the direction we traversed it.
401            let mut edges: Vec<(PathBuf, Direction)> = Vec::new();
402            if matches!(direction, Direction::Outgoing | Direction::Both) {
403                for nbr in forwardlinks(store, &current)? {
404                    edges.push((nbr, Direction::Outgoing));
405                }
406            }
407            if matches!(direction, Direction::Incoming | Direction::Both) {
408                for nbr in backlinks(store, &current)? {
409                    edges.push((nbr, Direction::Incoming));
410                }
411            }
412
413            for (neighbor, dir) in edges {
414                if cap_reached(admitted) {
415                    break;
416                }
417                if !discovered.insert(neighbor.clone()) {
418                    continue;
419                }
420                admitted += 1;
421                let (summary, type_) = read_summary_and_type(store, &neighbor);
422                let include = type_filter.is_empty()
423                    || type_
424                        .as_deref()
425                        .map(|t| type_filter.contains(t))
426                        .unwrap_or(false);
427                if include {
428                    nodes.push(ContextNode {
429                        path: neighbor.clone(),
430                        summary,
431                        type_,
432                        hops: hop,
433                        via: Some((current.clone(), dir)),
434                    });
435                }
436                // Off-type nodes are not emitted but still seed the next BFS
437                // level, so the type filter narrows the *result*, not the
438                // reachable graph.
439                frontier.push_back(neighbor);
440            }
441        }
442    }
443
444    Ok(ContextSlice {
445        seed: seed_rel,
446        nodes,
447    })
448}
449
450/// **SWEEP.** Content files with no incoming AND no outgoing wiki-links — the
451/// curation worklist ("ingested but not yet wired into the wiki"). Off the
452/// loop. Optionally scoped to a layer.
453///
454/// A file is an orphan iff it neither links out to another store file nor is
455/// linked to by one. Incoming edges are counted across the *whole* store
456/// (a link from any layer un-orphans a file), even when `layer` scopes the
457/// candidate set. Returns store-relative paths, sorted.
458pub fn orphans(store: &Store, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
459    // One walk of the whole store: for every content file, record (a) whether
460    // it has any outgoing link, and (b) accumulate the set of every target any
461    // file links to (its incoming-edge set). Both come from a single read per
462    // file — the SWEEP cost.
463    let all = walk_content_files(store)?;
464
465    let mut linked_to: HashSet<PathBuf> = HashSet::new();
466    let mut has_outgoing: HashMap<PathBuf, bool> = HashMap::new();
467
468    for abs in &all {
469        let rel = match rel_path(store, abs) {
470            Some(r) => r,
471            None => continue,
472        };
473        let self_target = normalize_target(&rel);
474
475        let body = match std::fs::read_to_string(abs) {
476            Ok(b) => b,
477            Err(e) if e.kind() == io::ErrorKind::InvalidData => String::new(),
478            Err(e) => return Err(StoreError::Io(e)),
479        };
480
481        let mut outgoing = false;
482        for target in extract_link_targets(&body) {
483            if target.is_empty() || target == self_target {
484                continue;
485            }
486            if resolve_existing(store, Path::new(&target)).is_none() {
487                continue;
488            }
489            outgoing = true;
490            linked_to.insert(PathBuf::from(target));
491        }
492        has_outgoing.insert(rel, outgoing);
493    }
494
495    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
496    for abs in &all {
497        let rel = match rel_path(store, abs) {
498            Some(r) => r,
499            None => continue,
500        };
501        if let Some(layer) = layer {
502            if path_layer(&rel) != Some(layer) {
503                continue;
504            }
505        }
506        let outgoing = has_outgoing.get(&rel).copied().unwrap_or(false);
507        let incoming = linked_to.contains(&PathBuf::from(normalize_target(&rel)));
508        if !outgoing && !incoming {
509            out.insert(rel);
510        }
511    }
512
513    Ok(out.into_iter().collect())
514}
515
516/// **Write-side.** Rewrite every incoming `[[old]]` wiki-link in `text` to
517/// `[[new]]`, preserving any `|display` override and emitting the canonical bare
518/// target (no `.md`). The write-side twin of [`backlinks`]: where `backlinks`
519/// *finds* the files carrying an edge to `old`, this *retargets* that edge to
520/// `new` inside one file's contents.
521///
522/// `old` and `new` are store-relative paths in the wiki-link sense — both are
523/// passed through the same [`normalize_target`] the read side keys on, so the
524/// `.md` and bare spellings of `old` collapse to one target and a match here is
525/// exactly a match [`backlinks`] / [`Store::find_links_to`](crate::Store::find_links_to)
526/// would report. A link is rewritten iff its normalized target equals
527/// `normalize_target(old)`; prefix collisions (`old=a/b` vs `[[a/bc]]`) and
528/// short-form links never match. Returns the rewritten text (identical to the
529/// input when nothing matched), so the caller can cheaply detect a no-op.
530///
531/// Operates on the raw bytes (not a parser round-trip) so a link in frontmatter
532/// or body is retargeted uniformly and nothing else is reflowed.
533pub fn rewrite_links_to(text: &str, old: &Path, new: &Path) -> String {
534    let old_target = normalize_target(old);
535    let new_target = normalize_target(new);
536    if old_target.is_empty() {
537        // No target to match → never rewrite anything.
538        return text.to_string();
539    }
540
541    let re = rewrite_link_re();
542    let mut out = String::with_capacity(text.len());
543    let mut last = 0usize;
544    for caps in re.captures_iter(text) {
545        let whole = caps.get(0).expect("group 0 always present");
546        let raw_target = caps.get(1).map(|m| m.as_str()).unwrap_or("");
547        // Match on the SAME normalized key the read side uses, so `[[old]]`,
548        // `[[old.md]]`, and `[[./old]]` all retarget and a prefix like
549        // `[[old-jr]]` never does.
550        if normalize_target(Path::new(raw_target)) != old_target {
551            continue;
552        }
553        // Copy the gap since the previous rewrite verbatim, then the rebuilt
554        // link: canonical bare new target + the original display, if any.
555        out.push_str(&text[last..whole.start()]);
556        out.push_str("[[");
557        out.push_str(&new_target);
558        if let Some(display) = caps.get(2) {
559            out.push('|');
560            out.push_str(display.as_str());
561        }
562        out.push_str("]]");
563        last = whole.end();
564    }
565    out.push_str(&text[last..]);
566    out
567}
568
569// ── Private helpers ─────────────────────────────────────────────────────────
570
571/// The wiki-link regex used by the **write side** ([`rewrite_links_to`]):
572/// captures the target (group 1) AND the optional `|display` (group 2) so a
573/// rewrite can re-emit the display verbatim. The target/display character
574/// classes match [`wiki_link_re`] exactly, so the write side recognizes a link
575/// iff the read side does — the two never disagree on what a `[[…]]` is.
576///
577/// Compiled once for the process via [`OnceLock`] — `rewrite_links_to` runs on
578/// the rename/link write path, so recompiling per call would be wasteful.
579fn rewrite_link_re() -> &'static Regex {
580    static RE: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
581    RE.get_or_init(|| {
582        Regex::new(r"\[\[([^\]\|\n]+?)(?:\|([^\]\n]*))?\]\]")
583            .expect("static wiki-link rewrite regex compiles")
584    })
585}
586
587/// Normalize a store-relative path into the canonical wiki-link target form:
588/// forward slashes, no leading `./` or `/`, and no trailing `.md`. This is the
589/// single key that incoming/outgoing edges are compared on, so the `.md` and
590/// bare forms of a target unify.
591fn normalize_target(path: &Path) -> String {
592    let mut s = path.to_string_lossy().replace('\\', "/");
593    while let Some(rest) = s.strip_prefix("./") {
594        s = rest.to_string();
595    }
596    let s = s.trim_start_matches('/');
597    let s = s.strip_suffix(".md").unwrap_or(s);
598    s.trim().to_string()
599}
600
601/// The wiki-link regex: `[[target]]` / `[[target|display]]`. Captures the raw
602/// target (group 1). Compiled once for the process via [`OnceLock`] —
603/// `extract_link_targets` runs on the O(changed) loop path (backlinks /
604/// forwardlinks / neighborhood), so the compile must not repeat per call.
605fn wiki_link_re() -> &'static Regex {
606    // target = anything up to the first `|` or `]`. display (optional) is
607    // discarded. Matches across a single line/body slice.
608    static RE: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
609    RE.get_or_init(|| {
610        Regex::new(r"\[\[([^\]\|\n]+?)(?:\|[^\]\n]*)?\]\]")
611            .expect("static wiki-link regex compiles")
612    })
613}
614
615/// Extract every wiki-link target from a body, normalized to the canonical
616/// store-relative form. Order-preserving; duplicates kept (callers dedup).
617fn extract_link_targets(body: &str) -> Vec<String> {
618    let re = wiki_link_re();
619    re.captures_iter(body)
620        .filter_map(|c| c.get(1))
621        .map(|m| normalize_target(Path::new(m.as_str().trim())))
622        .filter(|t| !t.is_empty())
623        .collect()
624}
625
626/// Resolve the store root + a store-relative path to the absolute on-disk file,
627/// trying the path as written and then with a `.md` extension. `None` if
628/// neither exists.
629fn resolve_existing(store: &Store, store_relative: &Path) -> Option<PathBuf> {
630    let direct = store.root.join(store_relative);
631    if direct.is_file() {
632        return Some(direct);
633    }
634    let normalized = normalize_target(store_relative);
635    let with_md = store.root.join(format!("{normalized}.md"));
636    if with_md.is_file() {
637        return Some(with_md);
638    }
639    None
640}
641
642/// Convert an absolute path under the store root into its store-relative form.
643fn rel_path(store: &Store, abs: &Path) -> Option<PathBuf> {
644    abs.strip_prefix(&store.root).ok().map(|p| p.to_path_buf())
645}
646
647/// Which layer a store-relative path sits in, by its first component.
648fn path_layer(rel: &Path) -> Option<Layer> {
649    let first = rel.components().next()?;
650    match first.as_os_str().to_str()? {
651        "sources" => Some(Layer::Sources),
652        "records" => Some(Layer::Records),
653        "wiki" => Some(Layer::Wiki),
654        _ => None,
655    }
656}
657
658/// True if a store-relative path is a *content* file: under `sources/`,
659/// `records/`, or `wiki/`, a `.md` file, and not an `index.md`. Meta files
660/// (`DB.md`, `log.md`, `log/…`, sidecars) are excluded.
661fn is_content_rel(rel: &Path) -> bool {
662    if path_layer(rel).is_none() {
663        return false;
664    }
665    match rel.extension().and_then(|e| e.to_str()) {
666        Some("md") => {}
667        _ => return false,
668    }
669    rel.file_name().and_then(|n| n.to_str()) != Some("index.md")
670}
671
672/// Walk every content `.md` file in the store via the **`ignore`** walker
673/// (the ripgrep directory engine). Only the three layer roots
674/// (`sources/`/`records/`/`wiki/`) are descended, so `DB.md`, `log.md`, and
675/// `log/` at the store root are structurally never reached; hidden dirs and
676/// per-folder `index.md` sidecars are filtered out ([`is_content_rel`]). Honors
677/// `.gitignore` the way `rg` does. Returns absolute paths. SWEEP-class.
678fn walk_content_files(store: &Store) -> Result<Vec<PathBuf>, StoreError> {
679    let mut out = Vec::new();
680    for layer in Layer::all() {
681        let dir = store.root.join(layer_dir_name(layer));
682        if !dir.is_dir() {
683            continue;
684        }
685        let walker = WalkBuilder::new(&dir)
686            .hidden(true)
687            .git_ignore(true)
688            .git_global(false)
689            .require_git(false)
690            .build();
691        for result in walker {
692            let entry = result.map_err(|e| StoreError::Search {
693                root: store.root.clone(),
694                message: format!("walk failed: {e}"),
695            })?;
696            if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
697                continue;
698            }
699            let abs = entry.into_path();
700            if let Some(rel) = rel_path(store, &abs) {
701                if is_content_rel(&rel) {
702                    out.push(abs);
703                }
704            }
705        }
706    }
707    Ok(out)
708}
709
710/// The on-disk folder name for a layer. Mirrors `Layer::dir_name`; kept local
711/// so the graph module owns its own copy rather than coupling to that body.
712fn layer_dir_name(layer: Layer) -> &'static str {
713    match layer {
714        Layer::Sources => "sources",
715        Layer::Records => "records",
716        Layer::Wiki => "wiki",
717    }
718}
719
720/// Read a reached node's `summary` and `type` from its frontmatter. A missing
721/// file, missing frontmatter, or unparseable YAML degrades to an empty summary
722/// / unknown type rather than failing the whole hydration — `neighborhood` is
723/// best-effort context assembly, not validation.
724fn read_summary_and_type(store: &Store, rel: &Path) -> (String, Option<String>) {
725    let abs = match resolve_existing(store, rel) {
726        Some(a) => a,
727        None => return (String::new(), None),
728    };
729    let text = match std::fs::read_to_string(&abs) {
730        Ok(t) => t,
731        Err(_) => return (String::new(), None),
732    };
733    let yaml = match frontmatter_block(&text) {
734        Some(y) => y,
735        None => return (String::new(), None),
736    };
737    let value: serde_norway::Value = match serde_norway::from_str(yaml) {
738        Ok(v) => v,
739        Err(_) => return (String::new(), None),
740    };
741    let summary = value
742        .get("summary")
743        .and_then(|v| v.as_str())
744        .unwrap_or("")
745        .to_string();
746    let type_ = value
747        .get("type")
748        .and_then(|v| v.as_str())
749        .map(|s| s.to_string());
750    (summary, type_)
751}
752
753/// Return the YAML between the opening and closing `---` fences (exclusive), or
754/// `None` if the text has no leading frontmatter block. Local mirror of the
755/// parser's split so the graph module stays self-contained.
756fn frontmatter_block(text: &str) -> Option<&str> {
757    // Tolerate a single leading UTF-8 BOM, matching parser/store/index/validate.
758    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
759    let rest = text
760        .strip_prefix("---\n")
761        .or_else(|| text.strip_prefix("---\r\n"))?;
762    // Find the closing fence: a line that is exactly `---`.
763    let mut idx = 0usize;
764    for line in rest.split_inclusive('\n') {
765        let trimmed = line.trim_end_matches(['\r', '\n']);
766        if trimmed == "---" {
767            return Some(&rest[..idx]);
768        }
769        idx += line.len();
770    }
771    None
772}
773
774#[cfg(test)]
775mod tests {
776    use super::*;
777    use std::fs;
778    use tempfile::TempDir;
779
780    use crate::parser::Config;
781
782    // ── Fixture builder ─────────────────────────────────────────────────────
783    //
784    // A real on-disk store in a tempdir. We write actual files (frontmatter +
785    // wiki-links) and exercise the real code paths. The fixture constructs the
786    // `Store` by its public fields rather than `Store::open`, so the graph
787    // tests stand on their own and do not depend on any other module's
788    // behavior. Each test asserts the behavior the SPEC promises, derived from
789    // intent, never from echoing the function's own output.
790    //
791    // `backlinks` (and `neighborhood` in any incoming direction) enumerate their
792    // candidate set from the type-folder `index.jsonl` sidecars — the loop
793    // contract: never a whole-store content walk. A real db.md store maintains
794    // those sidecars write-through, so a test that exercises backlinks must call
795    // [`Fixture::reindex`] after writing its files to build them (the SWEEP that
796    // `dbmd index rebuild` runs). Forwardlinks/orphans read content directly and
797    // need no sidecar.
798
799    struct Fixture {
800        _tmp: TempDir,
801        store: Store,
802    }
803
804    impl Fixture {
805        fn new() -> Self {
806            let tmp = TempDir::new().expect("tempdir");
807            let root = tmp.path().to_path_buf();
808            fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n# store\n").expect("DB.md");
809            let store = Store {
810                root,
811                config: Config::default(),
812            };
813            Fixture { _tmp: tmp, store }
814        }
815
816        /// Write a content file at a store-relative path with the given type,
817        /// summary, and body. Creates parent dirs.
818        fn write(&self, rel: &str, type_: &str, summary: &str, body: &str) {
819            let abs = self.store.root.join(rel);
820            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
821            let contents = format!(
822                "---\ntype: {type_}\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: {summary}\n---\n{body}\n"
823            );
824            fs::write(&abs, contents).expect("write file");
825        }
826
827        /// Write a raw file verbatim (for frontmatter-shape edge cases).
828        fn write_raw(&self, rel: &str, contents: &str) {
829            let abs = self.store.root.join(rel);
830            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
831            fs::write(&abs, contents).expect("write raw");
832        }
833
834        /// Build the type-folder `index.jsonl` sidecars from the content written
835        /// so far — the state a real store is always in (write-through), and the
836        /// candidate set `backlinks` reads. Call after writing files in any test
837        /// that exercises `backlinks` or an incoming-direction `neighborhood`.
838        fn reindex(&self) {
839            crate::index::Index::rebuild_all(&self.store).expect("rebuild sidecars");
840        }
841
842        fn p(&self, rel: &str) -> PathBuf {
843            PathBuf::from(rel)
844        }
845    }
846
847    fn paths(v: &[PathBuf]) -> Vec<String> {
848        v.iter()
849            .map(|p| p.to_string_lossy().replace('\\', "/"))
850            .collect()
851    }
852
853    // ── normalize_target ────────────────────────────────────────────────────
854
855    #[test]
856    fn normalize_strips_md_and_leading_dotslash() {
857        assert_eq!(
858            normalize_target(Path::new("records/contacts/sarah.md")),
859            "records/contacts/sarah"
860        );
861        assert_eq!(
862            normalize_target(Path::new("./wiki/people/elena")),
863            "wiki/people/elena"
864        );
865        assert_eq!(normalize_target(Path::new("/records/x")), "records/x");
866        // Bare and `.md` forms must collapse to the same key, or edges won't unify.
867        assert_eq!(
868            normalize_target(Path::new("a/b")),
869            normalize_target(Path::new("a/b.md"))
870        );
871    }
872
873    // ── extract_link_targets (forwardlinks core) ────────────────────────────
874
875    #[test]
876    fn extract_handles_display_text_and_md_suffix() {
877        let body = "See [[wiki/people/sarah-chen|Sarah]] and [[records/contacts/elena.md]].";
878        let got = extract_link_targets(body);
879        assert_eq!(
880            got,
881            vec!["wiki/people/sarah-chen", "records/contacts/elena"]
882        );
883    }
884
885    #[test]
886    fn extract_ignores_external_markdown_links() {
887        // Standard markdown links are NOT wiki-links and must not be extracted
888        // (SPEC: external refs don't participate in the graph).
889        let body = "[Acme](https://acme.io) but [[records/companies/acme]] is internal.";
890        let got = extract_link_targets(body);
891        assert_eq!(got, vec!["records/companies/acme"]);
892    }
893
894    #[test]
895    fn extract_display_text_is_not_treated_as_a_target() {
896        // A `|display` segment that looks path-like must not become a target;
897        // only the part before `|` is the link target.
898        let body = "[[records/contacts/sarah|sources/emails/decoy]]";
899        let got = extract_link_targets(body);
900        assert_eq!(got, vec!["records/contacts/sarah"]);
901    }
902
903    // ── rewrite_links_to (write-side twin of backlinks) ─────────────────────
904
905    #[test]
906    fn rewrite_plain_link_to_canonical_new_target() {
907        let got = rewrite_links_to(
908            "See [[records/contacts/sarah-chen]] today.",
909            Path::new("records/contacts/sarah-chen"),
910            Path::new("records/contacts/sarah-chen-acme"),
911        );
912        assert_eq!(got, "See [[records/contacts/sarah-chen-acme]] today.");
913    }
914
915    #[test]
916    fn rewrite_preserves_display_override() {
917        let got = rewrite_links_to(
918            "With [[records/contacts/sarah-chen|Sarah]].",
919            Path::new("records/contacts/sarah-chen"),
920            Path::new("records/contacts/sarah-chen-acme"),
921        );
922        assert_eq!(got, "With [[records/contacts/sarah-chen-acme|Sarah]].");
923    }
924
925    #[test]
926    fn rewrite_matches_md_suffixed_old_and_emits_bare_new() {
927        // The `.md` spelling of the old target must match (it normalizes to the
928        // same key the read side uses), and the new target is emitted bare —
929        // the writer doctrine validate enforces (`WIKI_LINK_HAS_EXTENSION`).
930        let got = rewrite_links_to(
931            "[[records/contacts/sarah-chen.md]]",
932            Path::new("records/contacts/sarah-chen"),
933            Path::new("records/contacts/new.md"),
934        );
935        assert_eq!(got, "[[records/contacts/new]]");
936    }
937
938    #[test]
939    fn rewrite_leaves_prefix_collisions_and_short_form_untouched() {
940        // Boundary correctness, anchored to the SAME normalize_target the read
941        // side keys on: `records/contacts/sarah-chen` must NOT match the longer
942        // `[[…-jr]]`, the short-form `[[sarah-chen]]`, or an unrelated target.
943        let input = "[[records/contacts/sarah-chen-jr]] [[sarah-chen]] [[wiki/topics/x]]";
944        let got = rewrite_links_to(
945            input,
946            Path::new("records/contacts/sarah-chen"),
947            Path::new("records/contacts/new"),
948        );
949        assert_eq!(got, input, "no genuine edge to the seed → text unchanged");
950    }
951
952    #[test]
953    fn rewrite_handles_multiple_occurrences_and_mixed_spellings() {
954        let got = rewrite_links_to(
955            "[[records/x]] then [[./records/x]] and [[records/x.md|d]] end",
956            Path::new("records/x"),
957            Path::new("records/y"),
958        );
959        // All three spellings of the same target retarget; the display survives.
960        assert_eq!(
961            got,
962            "[[records/y]] then [[records/y]] and [[records/y|d]] end"
963        );
964    }
965
966    #[test]
967    fn rewrite_retargets_exactly_the_edges_the_core_parser_sees() {
968        // The load-bearing property of moving the rewrite into core: the write
969        // side must operate on EXACTLY the edge set the read side recognizes —
970        // the same `extract_link_targets` / `normalize_target` grammar that
971        // `forwardlinks` is built on. Anchor the test to that grammar (via
972        // `forwardlinks` on a real file) rather than re-listing literals, so a
973        // future divergence between the read parser and the write rewrite fails
974        // here. (Coupled to `forwardlinks` — the single-file edge extractor —
975        // not the multi-file `backlinks` traversal, so it tests the grammar, not
976        // the walk.)
977        let fx = Fixture::new();
978        let body = "Met [[records/contacts/sarah.md|Sarah]] and not [[records/contacts/sarah-2]].";
979        fx.write("wiki/people/bio.md", "wiki-page", "bio", body);
980
981        // Read side: the parser sees two outgoing edges, both in canonical bare
982        // form (the `.md` spelling collapsed). `sarah` is a real edge here.
983        let edges = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
984        assert_eq!(
985            paths(&edges),
986            vec!["records/contacts/sarah", "records/contacts/sarah-2"],
987            "fixture must contain exactly the two edges this test reasons about"
988        );
989
990        // Write side: rewriting `sarah → sarah-chen` must retarget the edge the
991        // parser recognized (matching the `.md` spelling), preserve the display,
992        // and leave the unrelated `sarah-2` edge untouched.
993        let got = rewrite_links_to(
994            body,
995            Path::new("records/contacts/sarah"),
996            Path::new("records/contacts/sarah-chen"),
997        );
998        assert_eq!(
999            got,
1000            "Met [[records/contacts/sarah-chen|Sarah]] and not [[records/contacts/sarah-2]]."
1001        );
1002
1003        // Cross-check through the parser: the rewritten text's edge set is the
1004        // original with `sarah` swapped for `sarah-chen` — proving the rewrite
1005        // moved exactly one edge, the one the read side keyed on.
1006        fx.write("wiki/people/bio.md", "wiki-page", "bio", &got);
1007        let after = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
1008        assert_eq!(
1009            paths(&after),
1010            vec!["records/contacts/sarah-2", "records/contacts/sarah-chen"],
1011            "after rewrite the parser must see the new target and not the old"
1012        );
1013    }
1014
1015    #[test]
1016    fn rewrite_empty_old_target_is_a_no_op() {
1017        // A degenerate `old` (normalizes to empty) must never rewrite anything,
1018        // mirroring backlinks' empty-target guard.
1019        let input = "[[records/x]] [[]] text";
1020        let got = rewrite_links_to(input, Path::new(""), Path::new("records/y"));
1021        assert_eq!(got, input);
1022    }
1023
1024    #[test]
1025    fn rewrite_no_match_returns_input_unchanged() {
1026        let input = "no links, [external](https://x), and [[wiki/topics/y]]";
1027        let got = rewrite_links_to(input, Path::new("records/x"), Path::new("records/z"));
1028        assert_eq!(got, input);
1029    }
1030
1031    // ── forwardlinks ─────────────────────────────────────────────────────────
1032
1033    #[test]
1034    fn forwardlinks_returns_sorted_deduped_targets_excluding_self() {
1035        let fx = Fixture::new();
1036        fx.write(
1037            "wiki/projects/renewal.md",
1038            "wiki-page",
1039            "Renewal project",
1040            "Links: [[records/contacts/sarah]] [[records/companies/acme]] [[records/contacts/sarah]] and itself [[wiki/projects/renewal]].",
1041        );
1042        // The targets need not exist on disk for forwardlinks (it reads the one
1043        // file only). Self-links are dropped; duplicates collapse; sorted asc.
1044        let got = forwardlinks(&fx.store, &fx.p("wiki/projects/renewal.md")).unwrap();
1045        assert_eq!(
1046            paths(&got),
1047            vec!["records/companies/acme", "records/contacts/sarah"]
1048        );
1049    }
1050
1051    #[test]
1052    fn forwardlinks_picks_up_wiki_links_in_frontmatter() {
1053        // SPEC: wiki-links appear in scalar + block-sequence frontmatter fields,
1054        // not just the body. forwardlinks must follow those edges too.
1055        let fx = Fixture::new();
1056        fx.write_raw(
1057            "records/meetings/m1.md",
1058            "---\ntype: meeting\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Renewal sync\ncompany: [[records/companies/acme]]\nattendees:\n  - [[records/contacts/sarah]]\n  - [[records/contacts/elena]]\n---\nNotes about [[wiki/projects/renewal]].\n",
1059        );
1060        let got = forwardlinks(&fx.store, &fx.p("records/meetings/m1.md")).unwrap();
1061        assert_eq!(
1062            paths(&got),
1063            vec![
1064                "records/companies/acme",
1065                "records/contacts/elena",
1066                "records/contacts/sarah",
1067                "wiki/projects/renewal",
1068            ]
1069        );
1070    }
1071
1072    #[test]
1073    fn forwardlinks_missing_file_is_empty_not_error() {
1074        let fx = Fixture::new();
1075        let got = forwardlinks(&fx.store, &fx.p("wiki/people/ghost.md")).unwrap();
1076        assert!(got.is_empty());
1077    }
1078
1079    #[test]
1080    fn forwardlinks_resolves_seed_given_without_md_extension() {
1081        let fx = Fixture::new();
1082        fx.write(
1083            "wiki/people/sarah.md",
1084            "wiki-page",
1085            "Sarah bio",
1086            "Works at [[records/companies/acme]].",
1087        );
1088        // Seed passed in bare wiki-link form (no `.md`) must still resolve.
1089        let got = forwardlinks(&fx.store, &fx.p("wiki/people/sarah")).unwrap();
1090        assert_eq!(paths(&got), vec!["records/companies/acme"]);
1091    }
1092
1093    // ── backlinks ──────────────────────────────────────────────────────────
1094
1095    #[test]
1096    fn backlinks_finds_incoming_across_layers_and_link_forms() {
1097        let fx = Fixture::new();
1098        // Target.
1099        fx.write("records/contacts/sarah.md", "contact", "Sarah Chen", "");
1100        // Three different incoming-link spellings, all to the same target.
1101        fx.write(
1102            "wiki/people/sarah.md",
1103            "wiki-page",
1104            "bio",
1105            "See [[records/contacts/sarah]].",
1106        );
1107        fx.write(
1108            "records/meetings/m1.md",
1109            "meeting",
1110            "Renewal call",
1111            "Attendee [[records/contacts/sarah|Sarah]].",
1112        );
1113        fx.write(
1114            "sources/emails/e1.md",
1115            "email",
1116            "Hi",
1117            "From [[records/contacts/sarah.md]] today.",
1118        );
1119        // A file that links to a DIFFERENT contact must not be a backlink.
1120        fx.write(
1121            "wiki/people/other.md",
1122            "wiki-page",
1123            "x",
1124            "[[records/contacts/sarah-2]]",
1125        );
1126        fx.reindex();
1127
1128        // All three link forms ([[x]], [[x|d]], [[x.md]]) resolve to the same
1129        // target and are found; the linkers are returned in canonical bare form.
1130        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1131        assert_eq!(
1132            paths(&got),
1133            vec![
1134                "records/meetings/m1",
1135                "sources/emails/e1",
1136                "wiki/people/sarah",
1137            ]
1138        );
1139    }
1140
1141    #[test]
1142    fn backlinks_and_forwardlinks_round_trip_on_same_key() {
1143        // If A forwardlinks to B, then B backlinks to A — both expressed in the
1144        // identical bare key, so neighborhood can dedup across directions.
1145        let fx = Fixture::new();
1146        fx.write(
1147            "wiki/people/a.md",
1148            "wiki-page",
1149            "A",
1150            "Knows [[wiki/people/b]].",
1151        );
1152        fx.write("wiki/people/b.md", "wiki-page", "B", "");
1153        fx.reindex();
1154        let fwd = forwardlinks(&fx.store, &fx.p("wiki/people/a.md")).unwrap();
1155        let back = backlinks(&fx.store, &fx.p("wiki/people/b.md")).unwrap();
1156        assert_eq!(paths(&fwd), vec!["wiki/people/b"]);
1157        assert_eq!(paths(&back), vec!["wiki/people/a"]);
1158    }
1159
1160    #[test]
1161    fn backlinks_does_not_match_path_prefix_collisions() {
1162        let fx = Fixture::new();
1163        fx.write("records/contacts/sam.md", "contact", "Sam", "");
1164        // `sam-smith` shares the `sam` prefix; must NOT count as a backlink to `sam`.
1165        fx.write(
1166            "wiki/people/x.md",
1167            "wiki-page",
1168            "x",
1169            "[[records/contacts/sam-smith]]",
1170        );
1171        // The genuine backlink.
1172        fx.write(
1173            "wiki/people/y.md",
1174            "wiki-page",
1175            "y",
1176            "[[records/contacts/sam]]",
1177        );
1178        fx.reindex();
1179
1180        let got = backlinks(&fx.store, &fx.p("records/contacts/sam")).unwrap();
1181        assert_eq!(paths(&got), vec!["wiki/people/y"]);
1182    }
1183
1184    #[test]
1185    fn backlinks_excludes_self_reference() {
1186        let fx = Fixture::new();
1187        // A page that links to itself is not its own backlink.
1188        fx.write(
1189            "wiki/synthesis/overview.md",
1190            "wiki-page",
1191            "Overview",
1192            "This page [[wiki/synthesis/overview]] references itself.",
1193        );
1194        fx.reindex();
1195        let got = backlinks(&fx.store, &fx.p("wiki/synthesis/overview.md")).unwrap();
1196        assert!(
1197            got.is_empty(),
1198            "self-link must not appear as a backlink, got {got:?}"
1199        );
1200    }
1201
1202    #[test]
1203    fn backlinks_empty_when_nobody_links() {
1204        let fx = Fixture::new();
1205        fx.write("records/contacts/lonely.md", "contact", "Lonely", "");
1206        fx.write(
1207            "wiki/people/unrelated.md",
1208            "wiki-page",
1209            "x",
1210            "[[records/companies/acme]]",
1211        );
1212        fx.reindex();
1213        let got = backlinks(&fx.store, &fx.p("records/contacts/lonely.md")).unwrap();
1214        assert!(got.is_empty());
1215    }
1216
1217    #[test]
1218    fn backlinks_ignores_index_and_meta_files() {
1219        let fx = Fixture::new();
1220        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1221        // An index.md that lists the target must NOT be reported as a backlink
1222        // (indexes are catalog, not relationship edges).
1223        fx.write_raw(
1224            "records/contacts/index.md",
1225            "---\ntype: index\nscope: folder\nfolder: records/contacts\n---\n- [[records/contacts/sarah]] — Sarah\n",
1226        );
1227        fx.reindex();
1228        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1229        assert!(got.is_empty(), "index.md must be excluded, got {got:?}");
1230    }
1231
1232    #[test]
1233    fn backlinks_finds_body_only_edge_not_in_frontmatter_links_field() {
1234        // REGRESSION: the sidecar's `links` field carries only the file's
1235        // frontmatter `links:` list; it does NOT include wiki-links written in
1236        // the body or in other typed frontmatter fields. Answering backlinks
1237        // from `links[]` alone would silently miss this edge. The candidate set
1238        // is sidecar-bounded, but each candidate's edge is confirmed by parsing
1239        // the file (the same extraction forwardlinks uses), so a body-only link
1240        // must still register as a backlink.
1241        let fx = Fixture::new();
1242        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1243        // `meeting.md` links to sarah ONLY in its body — its frontmatter has no
1244        // `links:` field at all, so the sidecar record's `links` is empty.
1245        fx.write(
1246            "records/meetings/standup.md",
1247            "meeting",
1248            "Standup",
1249            "Discussed renewal with [[records/contacts/sarah]].",
1250        );
1251        fx.reindex();
1252
1253        // Guard the premise: the sidecar record really does carry an empty
1254        // `links` (so this test fails loudly if the index ever starts extracting
1255        // body links — at which point the backlink predicate could be revisited).
1256        let rec = fx
1257            .store
1258            .find_by_type("meeting")
1259            .unwrap()
1260            .into_iter()
1261            .find(|r| r.path == fx.p("records/meetings/standup.md"))
1262            .expect("meeting is catalogued in its sidecar");
1263        assert!(
1264            rec.links.is_empty(),
1265            "premise: the body link is NOT projected into the sidecar `links` field; got {:?}",
1266            rec.links
1267        );
1268
1269        // Yet backlinks still finds it — because it confirms via the file parse,
1270        // not via the sidecar `links` field.
1271        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1272        assert_eq!(
1273            paths(&got),
1274            vec!["records/meetings/standup"],
1275            "a body-only wiki-link must register as a backlink"
1276        );
1277    }
1278
1279    #[test]
1280    fn backlinks_finds_edge_in_typed_frontmatter_field() {
1281        // A wiki-link inside a *typed* frontmatter field (`company:`) is a real
1282        // edge forwardlinks follows, so backlinks must find it too — even though
1283        // the sidecar's `links` field (the `links:` key only) does not list it.
1284        let fx = Fixture::new();
1285        fx.write("records/companies/acme.md", "company", "Acme", "");
1286        fx.write_raw(
1287            "records/contacts/sarah.md",
1288            "---\ntype: contact\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Sarah\ncompany: [[records/companies/acme]]\n---\nBody with no links.\n",
1289        );
1290        fx.reindex();
1291        let got = backlinks(&fx.store, &fx.p("records/companies/acme.md")).unwrap();
1292        assert_eq!(
1293            paths(&got),
1294            vec!["records/contacts/sarah"],
1295            "a wiki-link in a typed frontmatter field is an incoming edge"
1296        );
1297    }
1298
1299    #[test]
1300    fn backlinks_unscoped_scans_the_tree_not_only_the_sidecar() {
1301        // REGRESSION (loop budget): an UNSCOPED `backlinks` must resolve incoming
1302        // edges with a SINGLE embedded-ripgrep pass over the tree
1303        // (`Store::find_links_to`), NOT by reading the sidecar candidate set and
1304        // then `read_to_string`-confirming each candidate (which re-opens every
1305        // content file → O(store); the documented >3x budget miss). A ripgrep
1306        // pass is the same scan engine `validate`/`rename`/`dbmd links` ride, and
1307        // the tree — not the sidecar — is its ground truth: a linker that is on
1308        // disk but absent from every sidecar (stale / never-built index) is still
1309        // found. We assert that behaviorally, which fails loudly if the unscoped
1310        // path ever reverts to the sidecar-bounded per-candidate confirm loop
1311        // (that loop would NOT find the unindexed linker).
1312        let fx = Fixture::new();
1313        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1314        fx.write(
1315            "wiki/people/indexed.md",
1316            "wiki-page",
1317            "Indexed",
1318            "[[records/contacts/sarah]]",
1319        );
1320        fx.reindex(); // builds sidecars for sarah + the indexed linker
1321
1322        // Now drop a NEW linker on disk WITHOUT reindexing — it is on disk but in
1323        // no sidecar.
1324        fx.write(
1325            "wiki/people/unindexed.md",
1326            "wiki-page",
1327            "Unindexed",
1328            "[[records/contacts/sarah]]",
1329        );
1330
1331        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1332        assert_eq!(
1333            paths(&got),
1334            vec!["wiki/people/indexed", "wiki/people/unindexed"],
1335            "unscoped backlinks ripgrep-scans the tree, so the on-disk-but-unindexed \
1336             linker is found too — not only the sidecar-catalogued one"
1337        );
1338    }
1339
1340    #[test]
1341    fn backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk() {
1342        // REGRESSION (scale contract): the SCOPED form (`--type` / `--in`) is the
1343        // I/O-scoped path — it enumerates candidates from the relevant type-folder
1344        // `index.jsonl` sidecars and parses only those, NOT a whole-tree walk.
1345        // That is what makes the scope an I/O scope, not just a result filter:
1346        // a linker that is on disk but ABSENT from the sidecar (stale / never-built
1347        // index) is NOT discovered by the scoped call (the sidecar bounds which
1348        // files are candidates). This is the loop-vs-walk distinction the SPEC
1349        // draws, and it is exactly the inverse of the unscoped tree scan above.
1350        let fx = Fixture::new();
1351        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1352        fx.write(
1353            "wiki/people/indexed.md",
1354            "wiki-page",
1355            "Indexed",
1356            "[[records/contacts/sarah]]",
1357        );
1358        fx.reindex(); // builds sidecars for sarah + the indexed linker
1359
1360        // Drop a NEW wiki-page linker on disk WITHOUT reindexing — on disk, in no
1361        // sidecar.
1362        fx.write(
1363            "wiki/people/unindexed.md",
1364            "wiki-page",
1365            "Unindexed",
1366            "[[records/contacts/sarah]]",
1367        );
1368
1369        // Scoped to the `wiki-page` type: the candidate set is the sidecar's, so
1370        // only the catalogued linker is found — the unindexed one is invisible.
1371        let only_wiki_pages = vec!["wiki-page".to_string()];
1372        let got = backlinks_filtered(
1373            &fx.store,
1374            &fx.p("records/contacts/sarah.md"),
1375            &only_wiki_pages,
1376            None,
1377        )
1378        .unwrap();
1379        assert_eq!(
1380            paths(&got),
1381            vec!["wiki/people/indexed"],
1382            "scoped backlinks reads the sidecar candidate set; the on-disk-but-unindexed \
1383             linker is not tree-walked"
1384        );
1385    }
1386
1387    #[test]
1388    fn backlinks_filtered_type_scopes_the_candidate_set() {
1389        // `--type` narrows backlinks to linkers of that type. Two files link to
1390        // the target — one `meeting`, one `wiki-page`; filtering to `meeting`
1391        // returns only the meeting.
1392        let fx = Fixture::new();
1393        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1394        fx.write(
1395            "records/meetings/m1.md",
1396            "meeting",
1397            "Call",
1398            "[[records/contacts/sarah]]",
1399        );
1400        fx.write(
1401            "wiki/people/bio.md",
1402            "wiki-page",
1403            "Bio",
1404            "[[records/contacts/sarah]]",
1405        );
1406        fx.reindex();
1407
1408        let only_meetings = vec!["meeting".to_string()];
1409        let got = backlinks_filtered(
1410            &fx.store,
1411            &fx.p("records/contacts/sarah.md"),
1412            &only_meetings,
1413            None,
1414        )
1415        .unwrap();
1416        assert_eq!(
1417            paths(&got),
1418            vec!["records/meetings/m1"],
1419            "--type meeting must exclude the wiki-page linker"
1420        );
1421
1422        // Unfiltered, both come back — proving the filter (not the data) dropped one.
1423        let all = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1424        assert_eq!(paths(&all), vec!["records/meetings/m1", "wiki/people/bio"]);
1425    }
1426
1427    #[test]
1428    fn backlinks_filtered_layer_scopes_the_candidate_set() {
1429        // `--in <layer>` narrows backlinks to linkers under that layer.
1430        let fx = Fixture::new();
1431        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1432        fx.write(
1433            "records/meetings/m1.md",
1434            "meeting",
1435            "Call",
1436            "[[records/contacts/sarah]]",
1437        );
1438        fx.write(
1439            "wiki/people/bio.md",
1440            "wiki-page",
1441            "Bio",
1442            "[[records/contacts/sarah]]",
1443        );
1444        fx.reindex();
1445
1446        let got = backlinks_filtered(
1447            &fx.store,
1448            &fx.p("records/contacts/sarah.md"),
1449            &[],
1450            Some(Layer::Wiki),
1451        )
1452        .unwrap();
1453        assert_eq!(
1454            paths(&got),
1455            vec!["wiki/people/bio"],
1456            "--in wiki must keep only the wiki-layer linker"
1457        );
1458
1459        let records_only = backlinks_filtered(
1460            &fx.store,
1461            &fx.p("records/contacts/sarah.md"),
1462            &[],
1463            Some(Layer::Records),
1464        )
1465        .unwrap();
1466        assert_eq!(paths(&records_only), vec!["records/meetings/m1"]);
1467    }
1468
1469    #[test]
1470    fn backlinks_scoped_type_spans_all_topic_folders_in_its_layer() {
1471        // REGRESSION (finding #12): a `type` can legitimately span several folders
1472        // within one layer — `wiki-page` is filed under `wiki/<topic>/` for an
1473        // arbitrary topic (SPEC). The scoped candidate set must read the whole
1474        // `wiki/` layer and filter by type, NOT just the canonical-guess folder
1475        // `wiki/topics/`. Before the fix, `find_by_type("wiki-page")` read ONLY
1476        // `wiki/topics/index.jsonl` whenever that sidecar existed, silently
1477        // dropping every wiki-page linker filed under any other topic folder — so
1478        // `backlinks --type wiki-page` under-reported dependents (a wrong
1479        // blast-radius check) the moment a `wiki/topics/` page also existed.
1480        //
1481        // The trigger needs BOTH: a populated `wiki/topics/` (so its canonical
1482        // sidecar exists) AND a wiki-page elsewhere in the layer that links the
1483        // target. The earlier
1484        // `backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk` test
1485        // masks this bug precisely because its fixture has no `wiki/topics/`.
1486        let fx = Fixture::new();
1487        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1488        // A wiki-page in the CANONICAL topic folder, NOT linking the target — its
1489        // only purpose is to make `wiki/topics/index.jsonl` exist on disk.
1490        fx.write(
1491            "wiki/topics/glossary.md",
1492            "wiki-page",
1493            "Glossary",
1494            "No link to sarah here.",
1495        );
1496        // A wiki-page in a NON-canonical topic folder that DOES link the target.
1497        fx.write(
1498            "wiki/people/sarah.md",
1499            "wiki-page",
1500            "Sarah bio",
1501            "Profile of [[records/contacts/sarah]].",
1502        );
1503        fx.reindex(); // builds wiki/topics/index.jsonl AND wiki/people/index.jsonl
1504
1505        // Scoped to `wiki-page`: the off-canonical linker MUST be found. Pre-fix,
1506        // the candidate set was only `wiki/topics/`'s sidecar, so this was empty.
1507        let scoped = backlinks_filtered(
1508            &fx.store,
1509            &fx.p("records/contacts/sarah.md"),
1510            &["wiki-page".to_string()],
1511            None,
1512        )
1513        .unwrap();
1514        assert_eq!(
1515            paths(&scoped),
1516            vec!["wiki/people/sarah"],
1517            "a wiki-page filed outside wiki/topics/ must still be a scoped backlink"
1518        );
1519
1520        // Cross-check: the unscoped path (ripgrep tree scan) finds the same single
1521        // linker, proving the scoped result is now complete — not over- or
1522        // under-counting — and that the data was real all along.
1523        let unscoped = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1524        assert_eq!(
1525            paths(&unscoped),
1526            vec!["wiki/people/sarah"],
1527            "scoped and unscoped backlinks must agree on the edge set"
1528        );
1529    }
1530
1531    // ── neighborhood ─────────────────────────────────────────────────────────
1532
1533    #[test]
1534    fn neighborhood_hops_zero_is_empty() {
1535        let fx = Fixture::new();
1536        fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
1537        fx.write("wiki/people/b.md", "wiki-page", "B", "");
1538        let slice = neighborhood(
1539            &fx.store,
1540            &fx.p("wiki/people/a.md"),
1541            0,
1542            &[],
1543            Direction::Both,
1544        )
1545        .unwrap();
1546        assert_eq!(slice.seed, fx.p("wiki/people/a"));
1547        assert!(slice.nodes.is_empty());
1548    }
1549
1550    #[test]
1551    fn neighborhood_outgoing_one_hop_reads_summary_and_type() {
1552        let fx = Fixture::new();
1553        fx.write(
1554            "wiki/people/a.md",
1555            "wiki-page",
1556            "Person A",
1557            "Knows [[records/contacts/b]].",
1558        );
1559        fx.write("records/contacts/b.md", "contact", "Contact B summary", "");
1560        let slice = neighborhood(
1561            &fx.store,
1562            &fx.p("wiki/people/a.md"),
1563            1,
1564            &[],
1565            Direction::Outgoing,
1566        )
1567        .unwrap();
1568        assert_eq!(slice.nodes.len(), 1);
1569        let n = &slice.nodes[0];
1570        assert_eq!(n.path, fx.p("records/contacts/b"));
1571        assert_eq!(n.summary, "Contact B summary");
1572        assert_eq!(n.type_.as_deref(), Some("contact"));
1573        assert_eq!(n.hops, 1);
1574        assert_eq!(n.via, Some((fx.p("wiki/people/a"), Direction::Outgoing)));
1575    }
1576
1577    #[test]
1578    fn neighborhood_incoming_only_walks_backlinks() {
1579        let fx = Fixture::new();
1580        // a -> seed (incoming to seed). seed -> c (outgoing from seed).
1581        fx.write(
1582            "wiki/people/seed.md",
1583            "wiki-page",
1584            "Seed",
1585            "Out to [[wiki/people/c]].",
1586        );
1587        fx.write(
1588            "wiki/people/a.md",
1589            "wiki-page",
1590            "A",
1591            "In to [[wiki/people/seed]].",
1592        );
1593        fx.write("wiki/people/c.md", "wiki-page", "C", "");
1594        fx.reindex();
1595        let slice = neighborhood(
1596            &fx.store,
1597            &fx.p("wiki/people/seed.md"),
1598            1,
1599            &[],
1600            Direction::Incoming,
1601        )
1602        .unwrap();
1603        // Incoming direction: only `a` (which links TO seed), not `c`.
1604        assert_eq!(
1605            paths(
1606                &slice
1607                    .nodes
1608                    .iter()
1609                    .map(|n| n.path.clone())
1610                    .collect::<Vec<_>>()
1611            ),
1612            vec!["wiki/people/a"]
1613        );
1614        assert_eq!(
1615            slice.nodes[0].via,
1616            Some((fx.p("wiki/people/seed"), Direction::Incoming))
1617        );
1618    }
1619
1620    #[test]
1621    fn neighborhood_bounded_bfs_respects_hop_limit_and_min_distance() {
1622        let fx = Fixture::new();
1623        // Chain a -> b -> c -> d, all outgoing.
1624        fx.write("wiki/c/a.md", "wiki-page", "A", "[[wiki/c/b]]");
1625        fx.write("wiki/c/b.md", "wiki-page", "B", "[[wiki/c/c]]");
1626        fx.write("wiki/c/c.md", "wiki-page", "C", "[[wiki/c/d]]");
1627        fx.write("wiki/c/d.md", "wiki-page", "D", "");
1628        let slice =
1629            neighborhood(&fx.store, &fx.p("wiki/c/a.md"), 2, &[], Direction::Outgoing).unwrap();
1630        // 2 hops reaches b (1) and c (2), not d (3).
1631        let by_path: HashMap<String, u32> = slice
1632            .nodes
1633            .iter()
1634            .map(|n| (n.path.to_string_lossy().to_string(), n.hops))
1635            .collect();
1636        assert_eq!(by_path.get("wiki/c/b").copied(), Some(1));
1637        assert_eq!(by_path.get("wiki/c/c").copied(), Some(2));
1638        assert_eq!(by_path.get("wiki/c/d"), None);
1639        assert_eq!(slice.nodes.len(), 2);
1640    }
1641
1642    #[test]
1643    fn neighborhood_records_min_hops_on_diamond() {
1644        let fx = Fixture::new();
1645        // Diamond: a -> b, a -> c, b -> d, c -> d. d is reachable at hop 2 from
1646        // either branch; it must be recorded once, at hop 2.
1647        fx.write("wiki/d/a.md", "wiki-page", "A", "[[wiki/d/b]] [[wiki/d/c]]");
1648        fx.write("wiki/d/b.md", "wiki-page", "B", "[[wiki/d/d]]");
1649        fx.write("wiki/d/c.md", "wiki-page", "C", "[[wiki/d/d]]");
1650        fx.write("wiki/d/d.md", "wiki-page", "D", "");
1651        let slice =
1652            neighborhood(&fx.store, &fx.p("wiki/d/a.md"), 3, &[], Direction::Outgoing).unwrap();
1653        let d_nodes: Vec<&ContextNode> = slice
1654            .nodes
1655            .iter()
1656            .filter(|n| n.path == fx.p("wiki/d/d"))
1657            .collect();
1658        assert_eq!(d_nodes.len(), 1, "d must appear exactly once");
1659        assert_eq!(d_nodes[0].hops, 2, "d's min distance from a is 2");
1660        // b and c at hop 1, d at hop 2 => 3 nodes total, no cycle blowup.
1661        assert_eq!(slice.nodes.len(), 3);
1662    }
1663
1664    #[test]
1665    fn neighborhood_type_filter_narrows_results_but_not_traversal() {
1666        let fx = Fixture::new();
1667        // seed -> contact -> meeting. Filtering to `meeting` must still reach
1668        // the meeting THROUGH the (excluded) contact at hop 2.
1669        fx.write(
1670            "wiki/people/seed.md",
1671            "wiki-page",
1672            "Seed",
1673            "[[records/contacts/sarah]]",
1674        );
1675        fx.write(
1676            "records/contacts/sarah.md",
1677            "contact",
1678            "Sarah",
1679            "[[records/meetings/m1]]",
1680        );
1681        fx.write("records/meetings/m1.md", "meeting", "Renewal call", "");
1682        let only_meetings = vec!["meeting".to_string()];
1683        let slice = neighborhood(
1684            &fx.store,
1685            &fx.p("wiki/people/seed.md"),
1686            2,
1687            &only_meetings,
1688            Direction::Outgoing,
1689        )
1690        .unwrap();
1691        // Only the meeting is returned; the contact is traversed but filtered out.
1692        assert_eq!(slice.nodes.len(), 1);
1693        assert_eq!(slice.nodes[0].path, fx.p("records/meetings/m1"));
1694        assert_eq!(slice.nodes[0].type_.as_deref(), Some("meeting"));
1695        assert_eq!(slice.nodes[0].hops, 2);
1696    }
1697
1698    #[test]
1699    fn neighborhood_capped_bounds_traversal_not_just_output() {
1700        // REGRESSION (finding #16): `neighborhood` expands every reached node, and
1701        // each incoming-edge expansion is a full-store scan, so the per-node cost
1702        // is O(visited × store). The CLI's `--limit` was applied post-hoc as a
1703        // `.take(n)` on the RESULT, which caps printed nodes but NOT the traversal
1704        // — the scans still fire for every reachable node. `neighborhood_capped`
1705        // bounds the traversal itself: once `max_nodes` distinct nodes are
1706        // admitted, the BFS stops discovering (and therefore stops scanning).
1707        //
1708        // Structure proving traversal — not just output — is bounded:
1709        //   seed -> a, b, c   (hop 1, discovered in sorted order: a, b, c)
1710        //   a    -> deep      (hop 2, reachable ONLY by expanding `a`)
1711        // Cap at 2: admit `a` and `b`, stop before `c` and before any hop-2
1712        // expansion. `deep` is therefore unreachable. A post-hoc `.take(2)` would
1713        // have traversed the whole graph (reaching `deep`) and only then truncated
1714        // — so the absence of `deep` is observable proof the traversal stopped.
1715        let fx = Fixture::new();
1716        fx.write(
1717            "wiki/n/seed.md",
1718            "wiki-page",
1719            "Seed",
1720            "[[wiki/n/a]] [[wiki/n/b]] [[wiki/n/c]]",
1721        );
1722        fx.write("wiki/n/a.md", "wiki-page", "A", "[[wiki/n/deep]]");
1723        fx.write("wiki/n/b.md", "wiki-page", "B", "");
1724        fx.write("wiki/n/c.md", "wiki-page", "C", "");
1725        fx.write("wiki/n/deep.md", "wiki-page", "Deep", "");
1726
1727        // Uncapped over 3 hops: all four reachable nodes appear (a, b, c at hop 1,
1728        // deep at hop 2) — the full set the cap is measured against.
1729        let full = neighborhood(
1730            &fx.store,
1731            &fx.p("wiki/n/seed.md"),
1732            3,
1733            &[],
1734            Direction::Outgoing,
1735        )
1736        .unwrap();
1737        assert_eq!(
1738            paths(
1739                &full
1740                    .nodes
1741                    .iter()
1742                    .map(|n| n.path.clone())
1743                    .collect::<Vec<_>>()
1744            ),
1745            vec!["wiki/n/a", "wiki/n/b", "wiki/n/c", "wiki/n/deep"],
1746            "uncapped traversal reaches every node within the hop budget"
1747        );
1748
1749        // Capped at 2 over the SAME hop budget: exactly the first two hop-1 nodes,
1750        // and crucially NOT `deep` — the cap halted the BFS before any node was
1751        // expanded into hop 2, so the deep node was never traversed to.
1752        let capped = neighborhood_capped(
1753            &fx.store,
1754            &fx.p("wiki/n/seed.md"),
1755            3,
1756            &[],
1757            Direction::Outgoing,
1758            Some(2),
1759        )
1760        .unwrap();
1761        assert_eq!(
1762            paths(
1763                &capped
1764                    .nodes
1765                    .iter()
1766                    .map(|n| n.path.clone())
1767                    .collect::<Vec<_>>()
1768            ),
1769            vec!["wiki/n/a", "wiki/n/b"],
1770            "the cap bounds traversal: only the first 2 nodes are reached, and the \
1771             hop-2 `deep` node (reachable only by expanding a capped-out node) is \
1772             never traversed"
1773        );
1774
1775        // `max_nodes = None` is exactly the unbounded `neighborhood` behavior.
1776        let uncapped = neighborhood_capped(
1777            &fx.store,
1778            &fx.p("wiki/n/seed.md"),
1779            3,
1780            &[],
1781            Direction::Outgoing,
1782            None,
1783        )
1784        .unwrap();
1785        assert_eq!(
1786            uncapped.nodes.len(),
1787            full.nodes.len(),
1788            "None cap matches the unbounded neighborhood result"
1789        );
1790    }
1791
1792    #[test]
1793    fn neighborhood_capped_both_direction_caps_the_node_count() {
1794        // The CLI always passes `Direction::Both` (the per-node backlinks scan is
1795        // the expensive path the cap exists to bound). The cap gates discovery in
1796        // any direction, so a hub linked from many nodes is still bounded.
1797        let fx = Fixture::new();
1798        fx.write("wiki/h/hub.md", "wiki-page", "Hub", "");
1799        for n in ["a", "b", "c", "d", "e"] {
1800            fx.write(&format!("wiki/h/{n}.md"), "wiki-page", n, "[[wiki/h/hub]]");
1801        }
1802        fx.reindex();
1803
1804        let capped = neighborhood_capped(
1805            &fx.store,
1806            &fx.p("wiki/h/hub.md"),
1807            1,
1808            &[],
1809            Direction::Both,
1810            Some(3),
1811        )
1812        .unwrap();
1813        assert_eq!(
1814            capped.nodes.len(),
1815            3,
1816            "Both-direction neighborhood is bounded to the node cap"
1817        );
1818
1819        // Without the cap the same call returns all five backlinking nodes,
1820        // proving the cap (not the data) limited the set.
1821        let uncapped =
1822            neighborhood(&fx.store, &fx.p("wiki/h/hub.md"), 1, &[], Direction::Both).unwrap();
1823        assert_eq!(uncapped.nodes.len(), 5);
1824    }
1825
1826    #[test]
1827    fn neighborhood_cycle_terminates() {
1828        let fx = Fixture::new();
1829        // a <-> b cycle. Must not loop forever; each appears once.
1830        fx.write("wiki/g/a.md", "wiki-page", "A", "[[wiki/g/b]]");
1831        fx.write("wiki/g/b.md", "wiki-page", "B", "[[wiki/g/a]]");
1832        fx.reindex();
1833        let slice =
1834            neighborhood(&fx.store, &fx.p("wiki/g/a.md"), 10, &[], Direction::Both).unwrap();
1835        // From a: b is the only other node (a is the seed, excluded).
1836        assert_eq!(
1837            paths(
1838                &slice
1839                    .nodes
1840                    .iter()
1841                    .map(|n| n.path.clone())
1842                    .collect::<Vec<_>>()
1843            ),
1844            vec!["wiki/g/b"]
1845        );
1846    }
1847
1848    // ── orphans ──────────────────────────────────────────────────────────────
1849
1850    #[test]
1851    fn orphans_finds_files_with_no_edges_either_direction() {
1852        let fx = Fixture::new();
1853        // Wired pair: a links to b (a has outgoing, b has incoming).
1854        fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
1855        fx.write("wiki/people/b.md", "wiki-page", "B", "");
1856        // Orphan: no links in or out.
1857        fx.write(
1858            "sources/emails/lonely.md",
1859            "email",
1860            "Lonely email",
1861            "Just text, no links.",
1862        );
1863        let got = orphans(&fx.store, None).unwrap();
1864        assert_eq!(paths(&got), vec!["sources/emails/lonely.md"]);
1865    }
1866
1867    #[test]
1868    fn orphans_file_with_only_broken_outgoing_link_is_orphan() {
1869        let fx = Fixture::new();
1870        // Broken targets are validation issues, not graph edges to another
1871        // store file. A file whose only link points nowhere is still an orphan.
1872        fx.write(
1873            "wiki/people/a.md",
1874            "wiki-page",
1875            "A",
1876            "[[records/contacts/ghost]]",
1877        );
1878        let got = orphans(&fx.store, None).unwrap();
1879        assert!(
1880            paths(&got).contains(&"wiki/people/a.md".to_string()),
1881            "broken outgoing links must not wire the graph: {got:?}"
1882        );
1883    }
1884
1885    #[test]
1886    fn orphans_file_with_only_incoming_is_not_orphan() {
1887        let fx = Fixture::new();
1888        // `target` has no outgoing links but IS linked to by `linker` — not an orphan.
1889        fx.write("records/contacts/target.md", "contact", "Target", "");
1890        fx.write(
1891            "wiki/people/linker.md",
1892            "wiki-page",
1893            "Linker",
1894            "[[records/contacts/target]]",
1895        );
1896        let got = orphans(&fx.store, None).unwrap();
1897        assert!(
1898            !paths(&got).contains(&"records/contacts/target.md".to_string()),
1899            "incoming-only is not an orphan: {got:?}"
1900        );
1901        // `linker` has outgoing, so also not an orphan.
1902        assert!(!paths(&got).contains(&"wiki/people/linker.md".to_string()));
1903    }
1904
1905    #[test]
1906    fn orphans_incoming_link_from_other_layer_unorphans() {
1907        let fx = Fixture::new();
1908        // Candidate in records/, only incoming edge comes from wiki/ — a
1909        // cross-layer link must still un-orphan it even when scoped to records.
1910        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1911        fx.write(
1912            "wiki/people/sarah.md",
1913            "wiki-page",
1914            "bio",
1915            "[[records/contacts/sarah]]",
1916        );
1917        // A genuine orphan in records/ to prove the scope still returns something.
1918        fx.write("records/contacts/nemo.md", "contact", "Nemo", "");
1919        let got = orphans(&fx.store, Some(Layer::Records)).unwrap();
1920        assert_eq!(paths(&got), vec!["records/contacts/nemo.md"]);
1921    }
1922
1923    #[test]
1924    fn orphans_layer_scope_filters_candidates() {
1925        let fx = Fixture::new();
1926        // One orphan per layer.
1927        fx.write("sources/emails/s.md", "email", "S", "no links");
1928        fx.write("records/contacts/r.md", "contact", "R", "");
1929        fx.write("wiki/people/w.md", "wiki-page", "W", "");
1930        let only_wiki = orphans(&fx.store, Some(Layer::Wiki)).unwrap();
1931        assert_eq!(paths(&only_wiki), vec!["wiki/people/w.md"]);
1932        let only_sources = orphans(&fx.store, Some(Layer::Sources)).unwrap();
1933        assert_eq!(paths(&only_sources), vec!["sources/emails/s.md"]);
1934        // No scope: all three, sorted (records, sources, wiki).
1935        let all = orphans(&fx.store, None).unwrap();
1936        assert_eq!(
1937            paths(&all),
1938            vec![
1939                "records/contacts/r.md",
1940                "sources/emails/s.md",
1941                "wiki/people/w.md",
1942            ]
1943        );
1944    }
1945
1946    #[test]
1947    fn orphans_self_link_does_not_count_as_an_edge() {
1948        let fx = Fixture::new();
1949        // A page that only links to itself has no real edges => still an orphan.
1950        fx.write(
1951            "wiki/synthesis/solo.md",
1952            "wiki-page",
1953            "Solo",
1954            "I reference [[wiki/synthesis/solo]] only.",
1955        );
1956        let got = orphans(&fx.store, None).unwrap();
1957        assert_eq!(paths(&got), vec!["wiki/synthesis/solo.md"]);
1958    }
1959
1960    #[test]
1961    fn orphans_excludes_index_and_db_files() {
1962        let fx = Fixture::new();
1963        // A lone index.md / DB.md must never be reported as an orphan content file.
1964        fx.write_raw(
1965            "wiki/index.md",
1966            "---\ntype: index\nscope: layer\nfolder: wiki\n---\n# wiki\n",
1967        );
1968        fx.write(
1969            "wiki/people/real-orphan.md",
1970            "wiki-page",
1971            "Real",
1972            "no links",
1973        );
1974        let got = orphans(&fx.store, None).unwrap();
1975        assert_eq!(paths(&got), vec!["wiki/people/real-orphan.md"]);
1976    }
1977
1978    // ── frontmatter_block helper ─────────────────────────────────────────────
1979
1980    #[test]
1981    fn frontmatter_block_extracts_between_fences() {
1982        let text = "---\ntype: contact\nsummary: hi\n---\nbody here\n";
1983        assert_eq!(
1984            frontmatter_block(text),
1985            Some("type: contact\nsummary: hi\n")
1986        );
1987    }
1988
1989    #[test]
1990    fn frontmatter_block_none_without_leading_fence() {
1991        let text = "no frontmatter here\n";
1992        assert_eq!(frontmatter_block(text), None);
1993    }
1994
1995    #[test]
1996    fn frontmatter_block_tolerates_leading_bom() {
1997        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
1998        // fence must not hide the frontmatter from the graph layer — otherwise a
1999        // BOM-prefixed file the catalog indexes contributes no backlinks/edges.
2000        // Pre-fix the `---\n` strip failed on the BOM and returned None.
2001        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody here\n";
2002        assert_eq!(
2003            frontmatter_block(text),
2004            Some("type: contact\nsummary: hi\n"),
2005            "a leading BOM must not hide frontmatter from the graph layer"
2006        );
2007    }
2008}