Skip to main content

dbmd_core/
graph.rs

1//! `graph` — the wiki-link **relationship layer**.
2//!
3//! Wiki-links are curated-relevance edges (the LLM wrote them), so the graph's
4//! job is to **assemble the relevant context around a seed**, not to be
5//! analyzed. **All ops are on-demand — there is no maintained graph** (a
6//! persistent graph is the roadmap engine).
7//!
8//! [`backlinks`] / [`forwardlinks`] are loop ops (O(changed), never O(store)).
9//! [`neighborhood`] is the high-value context-hydration op. [`orphans`] is a
10//! SWEEP curation worklist.
11//!
12//! Whole-graph analytics (connected components, cycle detection, shortest
13//! path, sinks/sources, DOT/JSON export) are deliberately **not** here — a
14//! human studying the graph opens the store in Obsidian; broken-link detection
15//! is [`crate::validate`]'s job (`WIKI_LINK_BROKEN`).
16//!
17//! ## Implementation note — two paths for the incoming-edge scan
18//!
19//! The scale contract (SPEC § Tooling, plan: *"the interactive loop is
20//! O(changed), never O(store)"*) is the load-bearing rule here. [`backlinks`]
21//! is a loop op, so it must **not** open and `read_to_string` every content file
22//! in the store on each call. It resolves incoming edges by one of two paths,
23//! chosen by whether the call is scoped:
24//!
25//! - **Unscoped** (`dbmd graph backlinks <x>`, no `--type`/`--in`): one
26//!   embedded-ripgrep pass for the literal `[[<target>]]` over the tree, via
27//!   [`Store::find_links_to`] (`grep` + `ignore`, early-exit per file) — the
28//!   same scan engine [`crate::validate`]'s working-set incoming-linker step
29//!   uses. A single store traversal with cheap presence-only matching, not N
30//!   whole-file parses; that is what keeps the unscoped call inside the loop
31//!   budget. [`backlinks`] then filters the raw hits to content files and emits
32//!   canonical bare targets (its relationship view), where the lower-level
33//!   [`Store::find_links_to`] returns every `.md` the text appears in.
34//! - **Scoped** (`--type` / `--in`): the candidate set is enumerated from the
35//!   relevant layer's `index.jsonl` sidecars — the sidecars of the one layer the
36//!   `--type` belongs to (via [`Store::sidecar_records`]), filtered to that type
37//!   — and each candidate is confirmed by a single-file parse. That is what makes
38//!   `--type` / `--in` an *I/O* scope, not just a result filter: a typed/layer-scoped
39//!   `backlinks` reads only the relevant layer's sidecars (O(entities-in-layer))
40//!   and parses only those files. A type's records can span several folders within
41//!   its layer (a `profile` filed under any `records/<folder>/`, not only its
42//!   canonical `records/profiles/`), so the read is layer-wide, not a single
43//!   canonical folder — otherwise off-canonical-folder linkers would be silently
44//!   dropped.
45//!
46//! **Why the scoped path confirms by parsing the candidate, not by trusting the
47//! sidecar's `links` field.** A sidecar record's `links` is the file's
48//! *frontmatter* `links:` list only — it does **not** capture wiki-links written
49//! in the body or inside other typed frontmatter fields (`company: [[…]]`,
50//! `attendees: [ … ]`, `derived_from: [ … ]`). [`forwardlinks`] extracts edges
51//! from the whole file, so to keep the two directions on the **same** edge set
52//! (an incoming edge to X is exactly: some file whose [`forwardlinks`] contains
53//! X) the incoming-edge confirmation re-parses each candidate file the same way.
54//! The sidecar bounds *which* files are candidates; the parse decides whether
55//! each truly links. The unscoped ripgrep path stays on that same edge set by
56//! matching the link text wherever it lives in the file (frontmatter or body).
57//! A node's `summary` / `type` likewise read frontmatter directly (the source of
58//! truth the sidecar is derived from; never stale).
59
60use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
61use std::io;
62use std::path::{Path, PathBuf};
63
64use ignore::WalkBuilder;
65
66use crate::index::IndexRecord;
67use crate::store::{
68    canonical_link_target, ensure_path_within_store, extract_edge_targets, fence_closes,
69    fence_opens, layer_for_type, link_edge_key, Layer, Store, StoreError,
70};
71
72/// Which edge directions a traversal follows.
73#[derive(Debug, Clone, Copy, PartialEq, Eq)]
74pub enum Direction {
75    /// Incoming edges only (backlinks).
76    Incoming,
77    /// Outgoing edges only (forwardlinks).
78    Outgoing,
79    /// Both directions.
80    Both,
81}
82
83/// One node reached during a [`neighborhood`] hydration: the file, its
84/// `summary`, and how it connects back toward the seed.
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub struct ContextNode {
87    /// The store-relative path of the reached file.
88    pub path: PathBuf,
89    /// The file's `summary` (read from its sidecar entry / frontmatter).
90    pub summary: String,
91    /// The file's `type`, when known.
92    pub type_: Option<String>,
93    /// Hop distance from the seed (the seed itself is 0).
94    pub hops: u32,
95    /// The relationship edge that brought this node into the slice: the path it
96    /// links to/from one hop closer to the seed, and the direction.
97    pub via: Option<(PathBuf, Direction)>,
98}
99
100/// The readable working-set digest [`neighborhood`] returns: the seed plus the
101/// reached nodes with their summaries and connections. The relationship-axis
102/// "turn a seed into context" primitive.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub struct ContextSlice {
105    /// The seed the slice was hydrated from.
106    pub seed: PathBuf,
107    /// The reached nodes (excluding the seed), in BFS order.
108    pub nodes: Vec<ContextNode>,
109}
110
111/// Incoming edges to `path`: files that wiki-link to it. The blast-radius /
112/// dependents primitive before an edit. Store-wide (every layer / every type);
113/// see [`backlinks_filtered`] for the `--type` / `--in`-scoped form.
114///
115/// `path` is the store-relative target as it would be written inside a
116/// wiki-link (with or without a trailing `.md`; both resolve to the same
117/// target). Returns each linking file as its **canonical bare wiki-link path**
118/// (store-relative, no `.md`) — the same key [`forwardlinks`] emits, so the two
119/// directions round-trip and [`neighborhood`] can use one node identity.
120/// Deduped, sorted, never including the seed itself.
121pub fn backlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
122    backlinks_filtered(store, path, &[], None)
123}
124
125/// Incoming edges to `path`, scoped by the linking file's `type` and/or layer —
126/// the `dbmd graph backlinks --type/--in` surface.
127///
128/// **Scale (the loop contract).** Two paths, by whether the call is scoped:
129///
130/// - **Unscoped** (`types` empty *and* `layer` `None`): one embedded-ripgrep
131///   pass for `[[<target>]]` across the store via [`Store::find_links_to`] — a
132///   single `grep` + `ignore` traversal with early-exit per file, never a
133///   `read_to_string` of every content file. This is the same scan engine
134///   [`crate::validate::validate_working_set`]'s incoming-linker step rides, and
135///   it keeps the unscoped call inside the loop budget (the old per-candidate
136///   confirm-read re-opened every file in the store → O(store)).
137/// - **Scoped** (`types` and/or `layer` set): the candidate set — the files that
138///   *might* link to `path` — is read from the relevant layer's `index.jsonl`
139///   sidecars, so the call touches only the named layer(s): O(entities-in-layer),
140///   the sanctioned loop cost. Each candidate is then confirmed by a single-file
141///   parse. When `types` lists several types, the sidecars of each type's layer
142///   are read and the candidate sets unioned (filtered to the type), so a type
143///   whose records span multiple folders within its layer (e.g. a `profile` filed
144///   under any `records/<folder>/`) is fully covered; a `layer` further restricts
145///   the candidate paths to that layer.
146///
147/// **Correctness (one edge set, both paths).** An incoming edge to X is exactly:
148/// some file whose [`forwardlinks`] contains X — a wiki-link in the body or in
149/// *any* frontmatter field (`company: [[…]]`, `attendees: [ … ]`), not just the
150/// sidecar's frontmatter `links:` projection. Both paths honor that:
151/// - The unscoped scan matches the literal `[[<target>]]` text wherever it lives
152///   in a file (frontmatter or body), the same edges [`forwardlinks`] extracts.
153///   [`Store::find_links_to`] returns *every* `.md` carrying the link text
154///   (including `index.md` catalogs); [`backlinks`] is the relationship view, so
155///   the results are filtered to content files ([`is_content_rel`]) and emitted
156///   as canonical bare targets, self-excluded.
157/// - The scoped path confirms each candidate via [`file_links_to`], which
158///   delegates to [`forwardlinks`] (body + every frontmatter field) — so a
159///   body-only or typed-field edge is caught, not just the sidecar's `links:`
160///   list.
161///
162/// Result form (canonical bare paths, deduped, sorted, seed excluded) is
163/// identical on both paths and matches [`backlinks`].
164pub fn backlinks_filtered(
165    store: &Store,
166    path: &Path,
167    types: &[String],
168    layer: Option<Layer>,
169) -> Result<Vec<PathBuf>, StoreError> {
170    let target = normalize_target(path);
171    if target.is_empty() {
172        return Ok(Vec::new());
173    }
174    let target_key = edge_key(&target);
175
176    // Unscoped: one content pass over the store (O(store) scan with early-exit
177    // per file), not a per-candidate read of every content file. `find_links_to`
178    // returns every `.md` carrying an edge to the target (incl. catalog
179    // `index.md`); narrow to content files and canonicalize to the bare target
180    // form `backlinks` emits, dropping the seed's self-link.
181    if types.is_empty() && layer.is_none() {
182        let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
183        for rel in store.find_links_to(path)? {
184            if !is_content_rel(&rel) {
185                continue;
186            }
187            let linker = normalize_target(&rel);
188            if linker.is_empty() || edge_key(&linker) == target_key {
189                // A file never counts as its own backlink (case-folded so a
190                // case-variant self-link is still excluded).
191                continue;
192            }
193            hits.insert(PathBuf::from(linker));
194        }
195        return Ok(hits.into_iter().collect());
196    }
197
198    // Scoped: read only the named folder(s)' sidecars for the candidate set, then
199    // confirm each candidate with a single-file parse — O(folder), the I/O scope
200    // `--type` / `--in` buys.
201    let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
202    for candidate in candidate_records(store, types, layer)? {
203        let rel = &candidate.path;
204        let candidate_target = normalize_target(rel);
205        if candidate_target.is_empty() || edge_key(&candidate_target) == target_key {
206            // A file never counts as its own backlink.
207            continue;
208        }
209        // Confirm the edge by parsing the candidate file the same way
210        // forwardlinks does (body + all frontmatter), so body/typed-field links
211        // are caught — the sidecar's `links` field alone would miss them.
212        if file_links_to(store, rel, &target)? {
213            hits.insert(PathBuf::from(candidate_target));
214        }
215    }
216
217    Ok(hits.into_iter().collect())
218}
219
220/// Outgoing edges from `path`: the wiki-link targets extracted from that single
221/// file. Loop-fast; follow the evidence chain.
222///
223/// `path` is the store-relative path of the file to read. Targets are returned
224/// as store-relative paths (bare, no `.md`), deduped and sorted; the file's
225/// links to itself are dropped. A missing file yields an empty list (a
226/// dangling seed has no outgoing edges to report — broken-link detection is
227/// [`crate::validate`]'s job).
228pub fn forwardlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
229    let self_key = edge_key(&normalize_target(path));
230    let abs = match resolve_existing(store, path) {
231        Some(a) => a,
232        None => return Ok(Vec::new()),
233    };
234    let body = match std::fs::read_to_string(&abs) {
235        Ok(b) => b,
236        // A file that isn't valid UTF-8 (e.g. a binary source) carries no
237        // wiki-links we can extract.
238        Err(e) if e.kind() == io::ErrorKind::InvalidData => return Ok(Vec::new()),
239        Err(e) => return Err(StoreError::Io(e)),
240    };
241
242    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
243    for target in extract_link_targets(&body) {
244        // Self-link drop is case-folded so a case-variant self-reference is also
245        // excluded on a case-insensitive filesystem.
246        if target.is_empty() || edge_key(&target) == self_key {
247            continue;
248        }
249        out.insert(PathBuf::from(target));
250    }
251    Ok(out.into_iter().collect())
252}
253
254/// The candidate set for an incoming-edge scan: the sidecar records that could
255/// link to the target, read from the type-folder `index.jsonl` sidecars (never
256/// a content-tree walk). `types`/`layer` narrow *which* sidecars are read — the
257/// I/O scope that keeps a typed/layer backlinks O(entities-in-layer).
258///
259/// - `types` non-empty: for each type, read **the whole layer** the type belongs
260///   to ([`layer_for_type`] → [`Store::sidecar_records`]) and keep the records of
261///   that `type`, unioned by path across the requested types. A `layer` filter,
262///   when given, intersects with the type's own layer (a type lives in exactly
263///   one layer, so a mismatched `--in` simply yields no candidates).
264/// - `types` empty: every sidecar record under `layer` (or store-wide when
265///   `None`) via [`Store::sidecar_records`].
266///
267/// **Why the whole layer, not just the type's canonical folder.** A `type` can
268/// legitimately span several folders within one layer — a conclusion `profile`
269/// is the canonical case (it lives under `records/profiles/` by default, but an
270/// agent may file one under any other `records/<folder>/`: `records/people/`,
271/// `records/projects/`, …). Reading only the single canonical-guess folder
272/// (`records/profiles/`) would silently drop every profile filed elsewhere in the
273/// layer, so a scoped `backlinks --type profile` would under-report dependents the
274/// moment that canonical folder exists — breaking the docstring's promise that the
275/// scoped edge set equals the unscoped one. Reading the type's full layer subtree
276/// and filtering by `type` is complete and still O(entities-in-layer), the
277/// sanctioned loop scope.
278fn candidate_records(
279    store: &Store,
280    types: &[String],
281    layer: Option<Layer>,
282) -> Result<Vec<IndexRecord>, StoreError> {
283    if types.is_empty() {
284        return store.sidecar_records(layer);
285    }
286    let mut by_path: std::collections::BTreeMap<PathBuf, IndexRecord> =
287        std::collections::BTreeMap::new();
288    for type_ in types {
289        // A type lives in exactly one layer; read that whole layer's sidecars so
290        // a record filed under a non-canonical folder of the same type (e.g. a
291        // `profile` under `records/people/` rather than `records/profiles/`) is
292        // still a candidate. An explicit `--in` layer that disagrees with the type's
293        // layer can never match the type, so skip the read entirely.
294        let type_layer = layer_for_type(type_);
295        if let Some(scope) = layer {
296            if scope != type_layer {
297                continue;
298            }
299        }
300        for rec in store.sidecar_records(Some(type_layer))? {
301            if rec.type_ == *type_ {
302                by_path.insert(rec.path.clone(), rec);
303            }
304        }
305    }
306    Ok(by_path.into_values().collect())
307}
308
309/// True if the store file at `rel` carries a wiki-link whose canonical target
310/// equals `target`. Delegates to [`forwardlinks`] so the incoming-edge predicate
311/// is *exactly* the outgoing-edge extraction — body + every frontmatter field —
312/// keeping the two directions on one edge set. `forwardlinks` already emits
313/// canonical bare targets, so `target` (likewise normalized by the caller) is
314/// compared directly. A missing/binary file links to nothing.
315fn file_links_to(store: &Store, rel: &Path, target: &str) -> Result<bool, StoreError> {
316    let edges = forwardlinks(store, rel)?;
317    let target_key = edge_key(target);
318    // Compare on the case-folded edge key so a case-variant link (e.g.
319    // `[[records/contacts/Sarah-Chen]]` to `sarah-chen.md`) is confirmed on a
320    // case-insensitive filesystem, agreeing with the unscoped scan and validate.
321    Ok(edges
322        .iter()
323        .any(|e| edge_key(&e.to_string_lossy()) == target_key))
324}
325
326/// **Context hydration.** Bounded BFS from `seed` over backlinks + forwardlinks
327/// out to `hops`, reading each reached file's `summary` + relationship, and
328/// returning a readable [`ContextSlice`]. Optionally filtered by `types` and
329/// `direction`. On-demand; no maintained graph. What the agent reaches for to
330/// assemble a working set in one call.
331///
332/// Traversal semantics:
333/// - **`hops`** bounds true graph distance from the seed. `hops == 0` returns
334///   an empty slice (the seed alone is no context).
335/// - **`direction`** selects which edges are followed: `Incoming` walks
336///   backlinks, `Outgoing` walks forwardlinks, `Both` walks the union.
337/// - **`types`**, when non-empty, filters which reached nodes appear in the
338///   slice — but traversal still passes *through* off-type nodes, so a
339///   `meeting` two hops out is still reachable through a `contact` even when
340///   filtering to `meeting`. (An empty `types` slice imposes no filter.)
341/// - Each node records the lowest hop count at which it is first reached (BFS
342///   order); the seed is never included as a node.
343///
344/// Unbounded traversal: delegates to [`neighborhood_capped`] with no node cap, so
345/// it expands every reachable node within `hops`. For a densely-interlinked store
346/// this is one full-store backlinks scan **per reached node** (O(visited × store))
347/// — prefer [`neighborhood_capped`] with a `max_nodes` cap to bound that work.
348pub fn neighborhood(
349    store: &Store,
350    seed: &Path,
351    hops: u32,
352    types: &[String],
353    direction: Direction,
354) -> Result<ContextSlice, StoreError> {
355    neighborhood_capped(store, seed, hops, types, direction, None)
356}
357
358/// [`neighborhood`] with a hard cap on how many nodes the BFS **traverses**.
359///
360/// `max_nodes` bounds the *traversal*, not just the result: each node the BFS
361/// expands triggers a per-node incoming-edge scan (an unscoped [`backlinks`] is a
362/// full-store ripgrep pass), so an uncapped neighborhood of a hub node costs
363/// O(visited × store). A post-hoc `.take(n)` on the returned nodes caps the
364/// *output* but not that work — the scans still run for every reached node. This
365/// cap stops discovering (and therefore stops scanning) once `max_nodes` distinct
366/// non-seed nodes have entered the BFS, so the expensive per-node scans are bounded
367/// to at most `max_nodes` of them. `None` is unbounded (the [`neighborhood`]
368/// behavior).
369///
370/// The cap is applied at *discovery* in BFS order, so the kept nodes are exactly
371/// the first `max_nodes` reached (closest-first by hop), and each still records its
372/// true minimum hop distance. Type-filtered (off-type) nodes count against the cap
373/// because the BFS must still traverse *through* them to reach deeper on-type
374/// nodes — the scan cost is paid when a node is expanded, on- or off-type alike.
375pub fn neighborhood_capped(
376    store: &Store,
377    seed: &Path,
378    hops: u32,
379    types: &[String],
380    direction: Direction,
381    max_nodes: Option<usize>,
382) -> Result<ContextSlice, StoreError> {
383    let seed_rel = PathBuf::from(normalize_target(seed));
384    let type_filter: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
385
386    // `discovered` guards against revisiting a node (and against re-adding the
387    // seed). BFS by levels so the first time we reach a node is its true min
388    // hop distance.
389    let mut discovered: HashSet<PathBuf> = HashSet::new();
390    discovered.insert(seed_rel.clone());
391
392    let mut nodes: Vec<ContextNode> = Vec::new();
393    let mut frontier: VecDeque<PathBuf> = VecDeque::new();
394    frontier.push_back(seed_rel.clone());
395
396    // Count of distinct non-seed nodes admitted to the BFS. Once it hits
397    // `max_nodes` we stop discovering new nodes, which stops enqueuing them, which
398    // stops the per-node full-store backlinks scan they would have triggered — the
399    // cap bounds the *traversal cost*, not only the printed result.
400    let mut admitted = 0usize;
401    let cap_reached = |admitted: usize| max_nodes.is_some_and(|cap| admitted >= cap);
402
403    let mut hop = 0u32;
404    while hop < hops && !frontier.is_empty() && !cap_reached(admitted) {
405        hop += 1;
406        let level_size = frontier.len();
407        for _ in 0..level_size {
408            if cap_reached(admitted) {
409                break;
410            }
411            let current = frontier.pop_front().expect("frontier non-empty");
412
413            // Collect this node's edges in the requested direction(s). Each
414            // edge carries the neighbor path + the direction we traversed it.
415            let mut edges: Vec<(PathBuf, Direction)> = Vec::new();
416            if matches!(direction, Direction::Outgoing | Direction::Both) {
417                for nbr in forwardlinks(store, &current)? {
418                    edges.push((nbr, Direction::Outgoing));
419                }
420            }
421            if matches!(direction, Direction::Incoming | Direction::Both) {
422                for nbr in backlinks(store, &current)? {
423                    edges.push((nbr, Direction::Incoming));
424                }
425            }
426
427            for (neighbor, dir) in edges {
428                if cap_reached(admitted) {
429                    break;
430                }
431                if !discovered.insert(neighbor.clone()) {
432                    continue;
433                }
434                admitted += 1;
435                let (summary, type_) = read_summary_and_type(store, &neighbor);
436                let include = type_filter.is_empty()
437                    || type_
438                        .as_deref()
439                        .map(|t| type_filter.contains(t))
440                        .unwrap_or(false);
441                if include {
442                    nodes.push(ContextNode {
443                        path: neighbor.clone(),
444                        summary,
445                        type_,
446                        hops: hop,
447                        via: Some((current.clone(), dir)),
448                    });
449                }
450                // Off-type nodes are not emitted but still seed the next BFS
451                // level, so the type filter narrows the *result*, not the
452                // reachable graph.
453                frontier.push_back(neighbor);
454            }
455        }
456    }
457
458    Ok(ContextSlice {
459        seed: seed_rel,
460        nodes,
461    })
462}
463
464/// **SWEEP.** Content files with no incoming AND no outgoing wiki-links — the
465/// curation worklist ("ingested but not yet wired into the wiki"). Off the
466/// loop. Optionally scoped to a layer.
467///
468/// A file is an orphan iff it neither links out to another store file nor is
469/// linked to by one. Incoming edges are counted across the *whole* store
470/// (a link from any layer un-orphans a file), even when `layer` scopes the
471/// candidate set. Returns store-relative paths, sorted.
472pub fn orphans(store: &Store, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
473    // One walk of the whole store: for every content file, record (a) whether
474    // it has any outgoing link, and (b) accumulate the set of every target any
475    // file links to (its incoming-edge set). Both come from a single read per
476    // file — the SWEEP cost.
477    let all = walk_content_files(store)?;
478
479    // `linked_to` holds case-folded edge KEYS (not raw paths): the link text may
480    // spell a target with different casing than the on-disk file (e.g.
481    // `[[records/contacts/Sarah-Chen]]` → `sarah-chen.md`), and on a
482    // case-insensitive filesystem that is a real incoming edge. Keying on
483    // `edge_key` so the incoming-edge lookup case-folds is what stops the
484    // false-positive orphan (a file with a live case-variant link reported as
485    // orphaned) — and matches validate, which resolves the same link via the
486    // case-insensitive filesystem.
487    let mut linked_to: HashSet<String> = HashSet::new();
488    let mut has_outgoing: HashMap<PathBuf, bool> = HashMap::new();
489
490    for abs in &all {
491        let rel = match rel_path(store, abs) {
492            Some(r) => r,
493            None => continue,
494        };
495        let self_key = edge_key(&normalize_target(&rel));
496
497        let body = match std::fs::read_to_string(abs) {
498            Ok(b) => b,
499            Err(e) if e.kind() == io::ErrorKind::InvalidData => String::new(),
500            Err(e) => return Err(StoreError::Io(e)),
501        };
502
503        let mut outgoing = false;
504        for target in extract_link_targets(&body) {
505            if target.is_empty() || edge_key(&target) == self_key {
506                continue;
507            }
508            if resolve_existing(store, Path::new(&target)).is_none() {
509                continue;
510            }
511            outgoing = true;
512            linked_to.insert(edge_key(&target));
513        }
514        has_outgoing.insert(rel, outgoing);
515    }
516
517    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
518    for abs in &all {
519        let rel = match rel_path(store, abs) {
520            Some(r) => r,
521            None => continue,
522        };
523        if let Some(layer) = layer {
524            if path_layer(&rel) != Some(layer) {
525                continue;
526            }
527        }
528        let outgoing = has_outgoing.get(&rel).copied().unwrap_or(false);
529        let incoming = linked_to.contains(&edge_key(&normalize_target(&rel)));
530        if !outgoing && !incoming {
531            out.insert(rel);
532        }
533    }
534
535    Ok(out.into_iter().collect())
536}
537
538/// **Write-side.** Rewrite every incoming `[[old]]` wiki-link in `text` to
539/// `[[new]]`, preserving any `|display` override and emitting the canonical bare
540/// target (no `.md`). The write-side twin of [`backlinks`]: where `backlinks`
541/// *finds* the files carrying an edge to `old`, this *retargets* that edge to
542/// `new` inside one file's contents.
543///
544/// `old` and `new` are store-relative paths in the wiki-link sense — both are
545/// passed through the same [`normalize_target`] the read side keys on, so the
546/// `.md` and bare spellings of `old` collapse to one target and a match here is
547/// exactly a match [`backlinks`] / [`Store::find_links_to`](crate::Store::find_links_to)
548/// would report. A link is rewritten iff its normalized target equals
549/// `normalize_target(old)`; prefix collisions (`old=a/b` vs `[[a/bc]]`) and
550/// short-form links never match. Returns the rewritten text (identical to the
551/// input when nothing matched), so the caller can cheaply detect a no-op.
552///
553/// Operates on the raw text (not a parser round-trip) so a link in frontmatter
554/// or body is retargeted uniformly and nothing else is reflowed — **except** a
555/// `[[...]]` inside a ``` fenced code block, which is a documentation example,
556/// not an edge: `rename` must NOT mutate fenced verbatim content (validate
557/// treats fenced links as non-edges, so rewriting them silently corrupts the
558/// example and makes rename disagree with validate). Matching is fence-aware,
559/// whitespace-trimmed, and case-folded to the filesystem, the exact edge notion
560/// [`backlinks`]/[`forwardlinks`] use — so rename retargets precisely the edges
561/// those report and nothing else.
562pub fn rewrite_links_to(text: &str, old: &Path, new: &Path) -> String {
563    let old_target = normalize_target(old);
564    let new_target = normalize_target(new);
565    if old_target.is_empty() {
566        // No target to match → never rewrite anything.
567        return text.to_string();
568    }
569    let old_key = edge_key(&old_target);
570
571    let mut out = String::with_capacity(text.len());
572    // Track the fence as a `(char, run length)` exactly like validate and
573    // `extract_edge_targets` (NOT a bool toggled on any ``` / ~~~ line). The
574    // naive toggle flips mid-block on a nested/indented/long-run fence, so a
575    // fenced example link would be rewritten — corrupting documentation and
576    // making rename disagree with validate's edge notion.
577    let mut fence: Option<(u8, usize)> = None;
578    // `split_inclusive` keeps each line's trailing `\n`, so copying a chunk
579    // verbatim preserves the original line endings exactly.
580    for line in text.split_inclusive('\n') {
581        // The fence rules key on line content without trailing `\r`/`\n`; the
582        // full chunk (line endings intact) is what we copy verbatim.
583        let content = line.trim_end_matches('\n').trim_end_matches('\r');
584        if let Some(f) = fence {
585            // Inside a fenced code block: copy verbatim, never rewrite. Only a
586            // matching closing fence ends the block.
587            if fence_closes(content, f) {
588                fence = None;
589            }
590            out.push_str(line);
591            continue;
592        }
593        if let Some(opened) = fence_opens(content) {
594            fence = Some(opened);
595            out.push_str(line);
596            continue;
597        }
598        rewrite_links_in_line(line, &old_key, &new_target, &mut out);
599    }
600    out
601}
602
603/// Rewrite every `[[...]]` on a single (non-fenced) line whose target matches
604/// `old_key`, appending the result to `out`. Preserves any `|display` override
605/// verbatim and emits the canonical bare `new_target`. A `[[...]]` whose target
606/// does not match (a prefix sibling, the short form, an unrelated target) is
607/// copied through untouched.
608fn rewrite_links_in_line(line: &str, old_key: &str, new_target: &str, out: &mut String) {
609    let bytes = line.as_bytes();
610    let mut i = 0usize;
611    let mut last = 0usize;
612    while i + 1 < bytes.len() {
613        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
614            if let Some(close) = line[i + 2..].find("]]") {
615                let inner = &line[i + 2..i + 2 + close];
616                // An embedded newline means this isn't a single-line link.
617                if !inner.contains('\n') {
618                    let (raw_target, display) = match inner.split_once('|') {
619                        Some((t, d)) => (t, Some(d)),
620                        None => (inner, None),
621                    };
622                    let raw_target = raw_target.trim();
623                    // Match on the SAME edge key the read side uses, so `[[old]]`,
624                    // `[[old.md]]`, `[[ ./old ]]`, and (case-insensitive FS)
625                    // `[[Old]]` all retarget while `[[old-jr]]` never does.
626                    if !raw_target.is_empty()
627                        && !raw_target.starts_with('[')
628                        && edge_key(&canonical_link_target(raw_target)) == old_key
629                    {
630                        out.push_str(&line[last..i]);
631                        out.push_str("[[");
632                        out.push_str(new_target);
633                        if let Some(display) = display {
634                            out.push('|');
635                            out.push_str(display);
636                        }
637                        out.push_str("]]");
638                        i = i + 2 + close + 2;
639                        last = i;
640                        continue;
641                    }
642                }
643                // Not a matching link: skip past this `]]` so an inner `[[`
644                // isn't re-scanned, but leave the text for the verbatim copy.
645                i = i + 2 + close + 2;
646                continue;
647            }
648        }
649        i += 1;
650    }
651    out.push_str(&line[last..]);
652}
653
654// ── Private helpers ─────────────────────────────────────────────────────────
655
656/// Normalize a store-relative path into the canonical wiki-link target form:
657/// forward slashes, no leading `./` or `/`, and no trailing `.md`. This is the
658/// canonical (case-PRESERVING) identity used for output and rewrites; edge
659/// *comparisons* go through [`edge_key`] so the `.md`/bare forms AND (on a
660/// case-insensitive filesystem) case-variant spellings of a target unify. The
661/// shared [`canonical_link_target`] is the single definition every db.md
662/// link op keys on.
663fn normalize_target(path: &Path) -> String {
664    canonical_link_target(&path.to_string_lossy())
665}
666
667/// The comparison key for an edge: the canonical target case-folded to the
668/// filesystem (identity on a case-sensitive FS, lowercased on macOS/Windows), so
669/// the string-keyed graph compares agree with the filesystem's case-insensitive
670/// `is_file()` resolution. `[[records/contacts/Sarah-Chen]]` and the on-disk
671/// `sarah-chen.md` must be the same edge on a case-insensitive filesystem or
672/// backlinks/orphans/rename silently disagree with validate.
673fn edge_key(canonical_target: &str) -> String {
674    link_edge_key(canonical_target)
675}
676
677/// Extract every wiki-link target from a body, normalized to the canonical
678/// store-relative form. Fence-aware and whitespace-trimmed via the shared
679/// [`extract_edge_targets`] — a `[[...]]` inside a ``` fenced code block is a
680/// documentation example, NOT an edge (matching validate), and `[[ x ]]`
681/// padding resolves identically to `[[x]]`. A target that would escape the store
682/// root (a `..` component) is dropped here too, so an escaping `[[../outside/x]]`
683/// is never reported as a forward edge and never seeds a [`neighborhood`]
684/// traversal out of the store (the disclosure vector validate flags as an
685/// error). Order-preserving; duplicates kept (callers dedup).
686fn extract_link_targets(body: &str) -> Vec<String> {
687    extract_edge_targets(body)
688        .into_iter()
689        .filter(|t| is_within_store_target(t))
690        .collect()
691}
692
693/// True if a canonical target stays inside the store: it has no `..`
694/// (`ParentDir`) component. The canonical form has already stripped any leading
695/// `./` or `/`, so a `Normal`-only path is a safe store-relative key; a `..`
696/// component is an escape and is rejected, mirroring validate's safe-path guard.
697fn is_within_store_target(target: &str) -> bool {
698    Path::new(target)
699        .components()
700        .all(|c| matches!(c, std::path::Component::Normal(_)))
701}
702
703/// Resolve the store root + a store-relative path to the absolute on-disk file,
704/// trying the path as written and then with a `.md` extension. `None` if neither
705/// exists **or if the target resolves outside the store root** — a `..`-laden or
706/// symlink-escaping wiki-link must never turn a graph read/traversal into a read
707/// of an arbitrary file outside the store (the `dbmd graph neighborhood`
708/// disclosure vector). Containment is enforced via the shared
709/// [`ensure_path_within_store`] gate, matching validate's safe-path guard.
710fn resolve_existing(store: &Store, store_relative: &Path) -> Option<PathBuf> {
711    let direct = store.root.join(store_relative);
712    if direct.is_file() && resolves_within_store(store, store_relative, &direct) {
713        return Some(direct);
714    }
715    let normalized = normalize_target(store_relative);
716    let with_md = store.root.join(format!("{normalized}.md"));
717    if with_md.is_file() && resolves_within_store(store, Path::new(&normalized), &with_md) {
718        return Some(with_md);
719    }
720    None
721}
722
723/// Containment check for a candidate on-disk path, with a cheap fast path. A
724/// store-relative path made of only `Normal` components (no `..`, no absolute /
725/// platform prefix) is trivially inside the root, so the common case avoids the
726/// `canonicalize` syscalls entirely. Anything with a `..`/absolute/prefix
727/// component falls through to the authoritative [`ensure_path_within_store`]
728/// gate (symlink-resolving), which is the only thing that can prove an escaping
729/// or symlink-redirected path actually stays inside the store.
730fn resolves_within_store(store: &Store, store_relative: &Path, abs: &Path) -> bool {
731    let plain_relative = !store_relative.is_absolute()
732        && store_relative
733            .components()
734            .all(|c| matches!(c, std::path::Component::Normal(_)));
735    if plain_relative {
736        return true;
737    }
738    ensure_path_within_store(&store.root, abs).is_ok()
739}
740
741/// Convert an absolute path under the store root into its store-relative form.
742fn rel_path(store: &Store, abs: &Path) -> Option<PathBuf> {
743    abs.strip_prefix(&store.root).ok().map(|p| p.to_path_buf())
744}
745
746/// Which layer a store-relative path sits in, by its first component.
747fn path_layer(rel: &Path) -> Option<Layer> {
748    let first = rel.components().next()?;
749    match first.as_os_str().to_str()? {
750        "sources" => Some(Layer::Sources),
751        "records" => Some(Layer::Records),
752        _ => None,
753    }
754}
755
756/// True if a store-relative path is a *content* file: under `sources/` or
757/// `records/`, a `.md` file, and not an `index.md`. Meta files
758/// (`DB.md`, `log.md`, `log/…`, sidecars) are excluded.
759fn is_content_rel(rel: &Path) -> bool {
760    if path_layer(rel).is_none() {
761        return false;
762    }
763    match rel.extension().and_then(|e| e.to_str()) {
764        Some("md") => {}
765        _ => return false,
766    }
767    rel.file_name().and_then(|n| n.to_str()) != Some("index.md")
768}
769
770/// Walk every content `.md` file in the store via the **`ignore`** walker
771/// (the ripgrep directory engine). Only the two layer roots
772/// (`sources/`/`records/`) are descended, so `DB.md`, `log.md`, and
773/// `log/` at the store root are structurally never reached; hidden dirs and
774/// per-folder `index.md` sidecars are filtered out ([`is_content_rel`]). Honors
775/// `.gitignore` the way `rg` does. Returns absolute paths. SWEEP-class.
776fn walk_content_files(store: &Store) -> Result<Vec<PathBuf>, StoreError> {
777    let mut out = Vec::new();
778    for layer in Layer::all() {
779        let dir = store.root.join(layer_dir_name(layer));
780        if !dir.is_dir() {
781            continue;
782        }
783        let walker = WalkBuilder::new(&dir)
784            .hidden(true)
785            .git_ignore(true)
786            .git_global(false)
787            .require_git(false)
788            // Follow symlinks so a symlinked `.md` content file or a symlinked
789            // type folder is walked like any other content (consistent with the
790            // store SWEEP walker), rather than silently vanishing from orphans.
791            .follow_links(true)
792            .build();
793        for result in walker {
794            let entry = result.map_err(|e| StoreError::Search {
795                root: store.root.clone(),
796                message: format!("walk failed: {e}"),
797            })?;
798            // A followed symlink entry reports its own type as `is_symlink()`, so
799            // also accept a symlink whose target is a regular file.
800            let is_file = match entry.file_type() {
801                Some(ft) if ft.is_file() => true,
802                Some(ft) if ft.is_symlink() => std::fs::metadata(entry.path())
803                    .map(|m| m.is_file())
804                    .unwrap_or(false),
805                _ => false,
806            };
807            if !is_file {
808                continue;
809            }
810            let abs = entry.into_path();
811            if let Some(rel) = rel_path(store, &abs) {
812                if is_content_rel(&rel) {
813                    out.push(abs);
814                }
815            }
816        }
817    }
818    Ok(out)
819}
820
821/// The on-disk folder name for a layer. Mirrors `Layer::dir_name`; kept local
822/// so the graph module owns its own copy rather than coupling to that body.
823fn layer_dir_name(layer: Layer) -> &'static str {
824    match layer {
825        Layer::Sources => "sources",
826        Layer::Records => "records",
827    }
828}
829
830/// Read a reached node's `summary` and `type` from its frontmatter. A missing
831/// file, missing frontmatter, or unparseable YAML degrades to an empty summary
832/// / unknown type rather than failing the whole hydration — `neighborhood` is
833/// best-effort context assembly, not validation.
834fn read_summary_and_type(store: &Store, rel: &Path) -> (String, Option<String>) {
835    let abs = match resolve_existing(store, rel) {
836        Some(a) => a,
837        None => return (String::new(), None),
838    };
839    let text = match std::fs::read_to_string(&abs) {
840        Ok(t) => t,
841        Err(_) => return (String::new(), None),
842    };
843    let yaml = match frontmatter_block(&text) {
844        Some(y) => y,
845        None => return (String::new(), None),
846    };
847    let value: serde_norway::Value = match serde_norway::from_str(yaml) {
848        Ok(v) => v,
849        Err(_) => return (String::new(), None),
850    };
851    let summary = value
852        .get("summary")
853        .and_then(|v| v.as_str())
854        .unwrap_or("")
855        .to_string();
856    let type_ = value
857        .get("type")
858        .and_then(|v| v.as_str())
859        .map(|s| s.to_string());
860    (summary, type_)
861}
862
863/// Return the YAML between the opening and closing `---` fences (exclusive), or
864/// `None` if the text has no leading frontmatter block. Local mirror of the
865/// parser's split so the graph module stays self-contained.
866fn frontmatter_block(text: &str) -> Option<&str> {
867    // Tolerate a single leading UTF-8 BOM, matching parser/store/index/validate.
868    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
869    let rest = text
870        .strip_prefix("---\n")
871        .or_else(|| text.strip_prefix("---\r\n"))?;
872    // Find the closing fence: a line that is exactly `---`.
873    let mut idx = 0usize;
874    for line in rest.split_inclusive('\n') {
875        let trimmed = line.trim_end_matches(['\r', '\n']);
876        if trimmed == "---" {
877            return Some(&rest[..idx]);
878        }
879        idx += line.len();
880    }
881    None
882}
883
884#[cfg(test)]
885mod tests {
886    use super::*;
887    use std::fs;
888    use tempfile::TempDir;
889
890    use crate::parser::Config;
891
892    // ── Fixture builder ─────────────────────────────────────────────────────
893    //
894    // A real on-disk store in a tempdir. We write actual files (frontmatter +
895    // wiki-links) and exercise the real code paths. The fixture constructs the
896    // `Store` by its public fields rather than `Store::open`, so the graph
897    // tests stand on their own and do not depend on any other module's
898    // behavior. Each test asserts the behavior the SPEC promises, derived from
899    // intent, never from echoing the function's own output.
900    //
901    // `backlinks` (and `neighborhood` in any incoming direction) enumerate their
902    // candidate set from the type-folder `index.jsonl` sidecars — the loop
903    // contract: never a whole-store content walk. A real db.md store maintains
904    // those sidecars write-through, so a test that exercises backlinks must call
905    // [`Fixture::reindex`] after writing its files to build them (the SWEEP that
906    // `dbmd index rebuild` runs). Forwardlinks/orphans read content directly and
907    // need no sidecar.
908
909    struct Fixture {
910        _tmp: TempDir,
911        store: Store,
912    }
913
914    impl Fixture {
915        fn new() -> Self {
916            let tmp = TempDir::new().expect("tempdir");
917            let root = tmp.path().to_path_buf();
918            fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n# store\n").expect("DB.md");
919            let store = Store {
920                root,
921                config: Config::default(),
922            };
923            Fixture { _tmp: tmp, store }
924        }
925
926        /// Write a content file at a store-relative path with the given type,
927        /// summary, and body. Creates parent dirs.
928        fn write(&self, rel: &str, type_: &str, summary: &str, body: &str) {
929            let abs = self.store.root.join(rel);
930            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
931            let contents = format!(
932                "---\ntype: {type_}\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: {summary}\n---\n{body}\n"
933            );
934            fs::write(&abs, contents).expect("write file");
935        }
936
937        /// Write a raw file verbatim (for frontmatter-shape edge cases).
938        fn write_raw(&self, rel: &str, contents: &str) {
939            let abs = self.store.root.join(rel);
940            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
941            fs::write(&abs, contents).expect("write raw");
942        }
943
944        /// Build the type-folder `index.jsonl` sidecars from the content written
945        /// so far — the state a real store is always in (write-through), and the
946        /// candidate set `backlinks` reads. Call after writing files in any test
947        /// that exercises `backlinks` or an incoming-direction `neighborhood`.
948        fn reindex(&self) {
949            crate::index::Index::rebuild_all(&self.store).expect("rebuild sidecars");
950        }
951
952        fn p(&self, rel: &str) -> PathBuf {
953            PathBuf::from(rel)
954        }
955    }
956
957    fn paths(v: &[PathBuf]) -> Vec<String> {
958        v.iter()
959            .map(|p| p.to_string_lossy().replace('\\', "/"))
960            .collect()
961    }
962
963    // ── normalize_target ────────────────────────────────────────────────────
964
965    #[test]
966    fn normalize_strips_md_and_leading_dotslash() {
967        assert_eq!(
968            normalize_target(Path::new("records/contacts/sarah.md")),
969            "records/contacts/sarah"
970        );
971        assert_eq!(
972            normalize_target(Path::new("./records/profiles/elena")),
973            "records/profiles/elena"
974        );
975        assert_eq!(normalize_target(Path::new("/records/x")), "records/x");
976        // Bare and `.md` forms must collapse to the same key, or edges won't unify.
977        assert_eq!(
978            normalize_target(Path::new("a/b")),
979            normalize_target(Path::new("a/b.md"))
980        );
981    }
982
983    // ── extract_link_targets (forwardlinks core) ────────────────────────────
984
985    #[test]
986    fn extract_handles_display_text_and_md_suffix() {
987        let body = "See [[records/profiles/sarah-chen|Sarah]] and [[records/contacts/elena.md]].";
988        let got = extract_link_targets(body);
989        assert_eq!(
990            got,
991            vec!["records/profiles/sarah-chen", "records/contacts/elena"]
992        );
993    }
994
995    #[test]
996    fn extract_ignores_external_markdown_links() {
997        // Standard markdown links are NOT wiki-links and must not be extracted
998        // (SPEC: external refs don't participate in the graph).
999        let body = "[Acme](https://acme.io) but [[records/companies/acme]] is internal.";
1000        let got = extract_link_targets(body);
1001        assert_eq!(got, vec!["records/companies/acme"]);
1002    }
1003
1004    #[test]
1005    fn extract_display_text_is_not_treated_as_a_target() {
1006        // A `|display` segment that looks path-like must not become a target;
1007        // only the part before `|` is the link target.
1008        let body = "[[records/contacts/sarah|sources/emails/decoy]]";
1009        let got = extract_link_targets(body);
1010        assert_eq!(got, vec!["records/contacts/sarah"]);
1011    }
1012
1013    // ── rewrite_links_to (write-side twin of backlinks) ─────────────────────
1014
1015    #[test]
1016    fn rewrite_plain_link_to_canonical_new_target() {
1017        let got = rewrite_links_to(
1018            "See [[records/contacts/sarah-chen]] today.",
1019            Path::new("records/contacts/sarah-chen"),
1020            Path::new("records/contacts/sarah-chen-acme"),
1021        );
1022        assert_eq!(got, "See [[records/contacts/sarah-chen-acme]] today.");
1023    }
1024
1025    #[test]
1026    fn rewrite_preserves_display_override() {
1027        let got = rewrite_links_to(
1028            "With [[records/contacts/sarah-chen|Sarah]].",
1029            Path::new("records/contacts/sarah-chen"),
1030            Path::new("records/contacts/sarah-chen-acme"),
1031        );
1032        assert_eq!(got, "With [[records/contacts/sarah-chen-acme|Sarah]].");
1033    }
1034
1035    #[test]
1036    fn rewrite_matches_md_suffixed_old_and_emits_bare_new() {
1037        // The `.md` spelling of the old target must match (it normalizes to the
1038        // same key the read side uses), and the new target is emitted bare —
1039        // the writer doctrine validate enforces (`WIKI_LINK_HAS_EXTENSION`).
1040        let got = rewrite_links_to(
1041            "[[records/contacts/sarah-chen.md]]",
1042            Path::new("records/contacts/sarah-chen"),
1043            Path::new("records/contacts/new.md"),
1044        );
1045        assert_eq!(got, "[[records/contacts/new]]");
1046    }
1047
1048    #[test]
1049    fn rewrite_leaves_prefix_collisions_and_short_form_untouched() {
1050        // Boundary correctness, anchored to the SAME normalize_target the read
1051        // side keys on: `records/contacts/sarah-chen` must NOT match the longer
1052        // `[[…-jr]]`, the short-form `[[sarah-chen]]`, or an unrelated target.
1053        let input = "[[records/contacts/sarah-chen-jr]] [[sarah-chen]] [[records/concepts/x]]";
1054        let got = rewrite_links_to(
1055            input,
1056            Path::new("records/contacts/sarah-chen"),
1057            Path::new("records/contacts/new"),
1058        );
1059        assert_eq!(got, input, "no genuine edge to the seed → text unchanged");
1060    }
1061
1062    #[test]
1063    fn rewrite_handles_multiple_occurrences_and_mixed_spellings() {
1064        let got = rewrite_links_to(
1065            "[[records/x]] then [[./records/x]] and [[records/x.md|d]] end",
1066            Path::new("records/x"),
1067            Path::new("records/y"),
1068        );
1069        // All three spellings of the same target retarget; the display survives.
1070        assert_eq!(
1071            got,
1072            "[[records/y]] then [[records/y]] and [[records/y|d]] end"
1073        );
1074    }
1075
1076    #[test]
1077    fn rewrite_retargets_exactly_the_edges_the_core_parser_sees() {
1078        // The load-bearing property of moving the rewrite into core: the write
1079        // side must operate on EXACTLY the edge set the read side recognizes —
1080        // the same `extract_link_targets` / `normalize_target` grammar that
1081        // `forwardlinks` is built on. Anchor the test to that grammar (via
1082        // `forwardlinks` on a real file) rather than re-listing literals, so a
1083        // future divergence between the read parser and the write rewrite fails
1084        // here. (Coupled to `forwardlinks` — the single-file edge extractor —
1085        // not the multi-file `backlinks` traversal, so it tests the grammar, not
1086        // the walk.)
1087        let fx = Fixture::new();
1088        let body = "Met [[records/contacts/sarah.md|Sarah]] and not [[records/contacts/sarah-2]].";
1089        fx.write("records/profiles/bio.md", "profile", "bio", body);
1090
1091        // Read side: the parser sees two outgoing edges, both in canonical bare
1092        // form (the `.md` spelling collapsed). `sarah` is a real edge here.
1093        let edges = forwardlinks(&fx.store, &fx.p("records/profiles/bio.md")).unwrap();
1094        assert_eq!(
1095            paths(&edges),
1096            vec!["records/contacts/sarah", "records/contacts/sarah-2"],
1097            "fixture must contain exactly the two edges this test reasons about"
1098        );
1099
1100        // Write side: rewriting `sarah → sarah-chen` must retarget the edge the
1101        // parser recognized (matching the `.md` spelling), preserve the display,
1102        // and leave the unrelated `sarah-2` edge untouched.
1103        let got = rewrite_links_to(
1104            body,
1105            Path::new("records/contacts/sarah"),
1106            Path::new("records/contacts/sarah-chen"),
1107        );
1108        assert_eq!(
1109            got,
1110            "Met [[records/contacts/sarah-chen|Sarah]] and not [[records/contacts/sarah-2]]."
1111        );
1112
1113        // Cross-check through the parser: the rewritten text's edge set is the
1114        // original with `sarah` swapped for `sarah-chen` — proving the rewrite
1115        // moved exactly one edge, the one the read side keyed on.
1116        fx.write("records/profiles/bio.md", "profile", "bio", &got);
1117        let after = forwardlinks(&fx.store, &fx.p("records/profiles/bio.md")).unwrap();
1118        assert_eq!(
1119            paths(&after),
1120            vec!["records/contacts/sarah-2", "records/contacts/sarah-chen"],
1121            "after rewrite the parser must see the new target and not the old"
1122        );
1123    }
1124
1125    #[test]
1126    fn rewrite_empty_old_target_is_a_no_op() {
1127        // A degenerate `old` (normalizes to empty) must never rewrite anything,
1128        // mirroring backlinks' empty-target guard.
1129        let input = "[[records/x]] [[]] text";
1130        let got = rewrite_links_to(input, Path::new(""), Path::new("records/y"));
1131        assert_eq!(got, input);
1132    }
1133
1134    #[test]
1135    fn rewrite_no_match_returns_input_unchanged() {
1136        let input = "no links, [external](https://x), and [[records/concepts/y]]";
1137        let got = rewrite_links_to(input, Path::new("records/x"), Path::new("records/z"));
1138        assert_eq!(got, input);
1139    }
1140
1141    #[test]
1142    fn rewrite_does_not_corrupt_links_in_nested_or_long_run_fences() {
1143        // Regression for the naive `starts_with("```")/("~~~")` toggle in the
1144        // rewriter: a fenced example documenting wiki-link syntax must be copied
1145        // VERBATIM, never retargeted — matching validate's edge notion. The
1146        // standard nested-fence convention (a ````-run block wrapping a ```
1147        // example) used to flip the bool mid-block, so the example link was
1148        // rewritten (silent documentation corruption).
1149        let body = "\
1150Here is how to write a link:
1151
1152````
1153```
1154[[records/contacts/bob]]
1155```
1156still fenced [[records/contacts/bob]]
1157````
1158
1159Real link: [[records/contacts/bob]].
1160";
1161        let got = rewrite_links_to(
1162            body,
1163            Path::new("records/contacts/bob"),
1164            Path::new("records/contacts/robert"),
1165        );
1166        // The two fenced examples are untouched; only the real link retargets.
1167        let expected = "\
1168Here is how to write a link:
1169
1170````
1171```
1172[[records/contacts/bob]]
1173```
1174still fenced [[records/contacts/bob]]
1175````
1176
1177Real link: [[records/contacts/robert]].
1178";
1179        assert_eq!(
1180            got, expected,
1181            "fenced example links must survive a rename verbatim; only live edges retarget"
1182        );
1183    }
1184
1185    // ── forwardlinks ─────────────────────────────────────────────────────────
1186
1187    #[test]
1188    fn forwardlinks_returns_sorted_deduped_targets_excluding_self() {
1189        let fx = Fixture::new();
1190        fx.write(
1191            "records/projects/renewal.md",
1192            "synthesis",
1193            "Renewal project",
1194            "Links: [[records/contacts/sarah]] [[records/companies/acme]] [[records/contacts/sarah]] and itself [[records/projects/renewal]].",
1195        );
1196        // The targets need not exist on disk for forwardlinks (it reads the one
1197        // file only). Self-links are dropped; duplicates collapse; sorted asc.
1198        let got = forwardlinks(&fx.store, &fx.p("records/projects/renewal.md")).unwrap();
1199        assert_eq!(
1200            paths(&got),
1201            vec!["records/companies/acme", "records/contacts/sarah"]
1202        );
1203    }
1204
1205    #[test]
1206    fn forwardlinks_picks_up_wiki_links_in_frontmatter() {
1207        // SPEC: wiki-links appear in scalar + block-sequence frontmatter fields,
1208        // not just the body. forwardlinks must follow those edges too.
1209        let fx = Fixture::new();
1210        fx.write_raw(
1211            "records/meetings/m1.md",
1212            "---\ntype: meeting\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Renewal sync\ncompany: [[records/companies/acme]]\nattendees:\n  - [[records/contacts/sarah]]\n  - [[records/contacts/elena]]\n---\nNotes about [[records/projects/renewal]].\n",
1213        );
1214        let got = forwardlinks(&fx.store, &fx.p("records/meetings/m1.md")).unwrap();
1215        assert_eq!(
1216            paths(&got),
1217            vec![
1218                "records/companies/acme",
1219                "records/contacts/elena",
1220                "records/contacts/sarah",
1221                "records/projects/renewal",
1222            ]
1223        );
1224    }
1225
1226    #[test]
1227    fn forwardlinks_missing_file_is_empty_not_error() {
1228        let fx = Fixture::new();
1229        let got = forwardlinks(&fx.store, &fx.p("records/profiles/ghost.md")).unwrap();
1230        assert!(got.is_empty());
1231    }
1232
1233    #[test]
1234    fn forwardlinks_resolves_seed_given_without_md_extension() {
1235        let fx = Fixture::new();
1236        fx.write(
1237            "records/profiles/sarah.md",
1238            "profile",
1239            "Sarah bio",
1240            "Works at [[records/companies/acme]].",
1241        );
1242        // Seed passed in bare wiki-link form (no `.md`) must still resolve.
1243        let got = forwardlinks(&fx.store, &fx.p("records/profiles/sarah")).unwrap();
1244        assert_eq!(paths(&got), vec!["records/companies/acme"]);
1245    }
1246
1247    // ── backlinks ──────────────────────────────────────────────────────────
1248
1249    #[test]
1250    fn backlinks_finds_incoming_across_layers_and_link_forms() {
1251        let fx = Fixture::new();
1252        // Target.
1253        fx.write("records/contacts/sarah.md", "contact", "Sarah Chen", "");
1254        // Three different incoming-link spellings, all to the same target.
1255        fx.write(
1256            "records/profiles/sarah.md",
1257            "profile",
1258            "bio",
1259            "See [[records/contacts/sarah]].",
1260        );
1261        fx.write(
1262            "records/meetings/m1.md",
1263            "meeting",
1264            "Renewal call",
1265            "Attendee [[records/contacts/sarah|Sarah]].",
1266        );
1267        fx.write(
1268            "sources/emails/e1.md",
1269            "email",
1270            "Hi",
1271            "From [[records/contacts/sarah.md]] today.",
1272        );
1273        // A file that links to a DIFFERENT contact must not be a backlink.
1274        fx.write(
1275            "records/profiles/other.md",
1276            "profile",
1277            "x",
1278            "[[records/contacts/sarah-2]]",
1279        );
1280        fx.reindex();
1281
1282        // All three link forms ([[x]], [[x|d]], [[x.md]]) resolve to the same
1283        // target and are found; the linkers are returned in canonical bare form.
1284        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1285        assert_eq!(
1286            paths(&got),
1287            vec![
1288                "records/meetings/m1",
1289                "records/profiles/sarah",
1290                "sources/emails/e1",
1291            ]
1292        );
1293    }
1294
1295    #[test]
1296    fn backlinks_and_forwardlinks_round_trip_on_same_key() {
1297        // If A forwardlinks to B, then B backlinks to A — both expressed in the
1298        // identical bare key, so neighborhood can dedup across directions.
1299        let fx = Fixture::new();
1300        fx.write(
1301            "records/profiles/a.md",
1302            "profile",
1303            "A",
1304            "Knows [[records/profiles/b]].",
1305        );
1306        fx.write("records/profiles/b.md", "profile", "B", "");
1307        fx.reindex();
1308        let fwd = forwardlinks(&fx.store, &fx.p("records/profiles/a.md")).unwrap();
1309        let back = backlinks(&fx.store, &fx.p("records/profiles/b.md")).unwrap();
1310        assert_eq!(paths(&fwd), vec!["records/profiles/b"]);
1311        assert_eq!(paths(&back), vec!["records/profiles/a"]);
1312    }
1313
1314    #[test]
1315    fn backlinks_does_not_match_path_prefix_collisions() {
1316        let fx = Fixture::new();
1317        fx.write("records/contacts/sam.md", "contact", "Sam", "");
1318        // `sam-smith` shares the `sam` prefix; must NOT count as a backlink to `sam`.
1319        fx.write(
1320            "records/profiles/x.md",
1321            "profile",
1322            "x",
1323            "[[records/contacts/sam-smith]]",
1324        );
1325        // The genuine backlink.
1326        fx.write(
1327            "records/profiles/y.md",
1328            "profile",
1329            "y",
1330            "[[records/contacts/sam]]",
1331        );
1332        fx.reindex();
1333
1334        let got = backlinks(&fx.store, &fx.p("records/contacts/sam")).unwrap();
1335        assert_eq!(paths(&got), vec!["records/profiles/y"]);
1336    }
1337
1338    #[test]
1339    fn backlinks_excludes_self_reference() {
1340        let fx = Fixture::new();
1341        // A page that links to itself is not its own backlink.
1342        fx.write(
1343            "records/synthesis/overview.md",
1344            "synthesis",
1345            "Overview",
1346            "This page [[records/synthesis/overview]] references itself.",
1347        );
1348        fx.reindex();
1349        let got = backlinks(&fx.store, &fx.p("records/synthesis/overview.md")).unwrap();
1350        assert!(
1351            got.is_empty(),
1352            "self-link must not appear as a backlink, got {got:?}"
1353        );
1354    }
1355
1356    #[test]
1357    fn backlinks_empty_when_nobody_links() {
1358        let fx = Fixture::new();
1359        fx.write("records/contacts/lonely.md", "contact", "Lonely", "");
1360        fx.write(
1361            "records/profiles/unrelated.md",
1362            "profile",
1363            "x",
1364            "[[records/companies/acme]]",
1365        );
1366        fx.reindex();
1367        let got = backlinks(&fx.store, &fx.p("records/contacts/lonely.md")).unwrap();
1368        assert!(got.is_empty());
1369    }
1370
1371    #[test]
1372    fn backlinks_ignores_index_and_meta_files() {
1373        let fx = Fixture::new();
1374        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1375        // An index.md that lists the target must NOT be reported as a backlink
1376        // (indexes are catalog, not relationship edges).
1377        fx.write_raw(
1378            "records/contacts/index.md",
1379            "---\ntype: index\nscope: folder\nfolder: records/contacts\n---\n- [[records/contacts/sarah]] — Sarah\n",
1380        );
1381        fx.reindex();
1382        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1383        assert!(got.is_empty(), "index.md must be excluded, got {got:?}");
1384    }
1385
1386    #[test]
1387    fn backlinks_finds_body_only_edge_not_in_frontmatter_links_field() {
1388        // REGRESSION: the sidecar's `links` field carries only the file's
1389        // frontmatter `links:` list; it does NOT include wiki-links written in
1390        // the body or in other typed frontmatter fields. Answering backlinks
1391        // from `links[]` alone would silently miss this edge. The candidate set
1392        // is sidecar-bounded, but each candidate's edge is confirmed by parsing
1393        // the file (the same extraction forwardlinks uses), so a body-only link
1394        // must still register as a backlink.
1395        let fx = Fixture::new();
1396        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1397        // `meeting.md` links to sarah ONLY in its body — its frontmatter has no
1398        // `links:` field at all, so the sidecar record's `links` is empty.
1399        fx.write(
1400            "records/meetings/standup.md",
1401            "meeting",
1402            "Standup",
1403            "Discussed renewal with [[records/contacts/sarah]].",
1404        );
1405        fx.reindex();
1406
1407        // Guard the premise: the sidecar record really does carry an empty
1408        // `links` (so this test fails loudly if the index ever starts extracting
1409        // body links — at which point the backlink predicate could be revisited).
1410        let rec = fx
1411            .store
1412            .find_by_type("meeting")
1413            .unwrap()
1414            .into_iter()
1415            .find(|r| r.path == fx.p("records/meetings/standup.md"))
1416            .expect("meeting is catalogued in its sidecar");
1417        assert!(
1418            rec.links.is_empty(),
1419            "premise: the body link is NOT projected into the sidecar `links` field; got {:?}",
1420            rec.links
1421        );
1422
1423        // Yet backlinks still finds it — because it confirms via the file parse,
1424        // not via the sidecar `links` field.
1425        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1426        assert_eq!(
1427            paths(&got),
1428            vec!["records/meetings/standup"],
1429            "a body-only wiki-link must register as a backlink"
1430        );
1431    }
1432
1433    #[test]
1434    fn backlinks_finds_edge_in_typed_frontmatter_field() {
1435        // A wiki-link inside a *typed* frontmatter field (`company:`) is a real
1436        // edge forwardlinks follows, so backlinks must find it too — even though
1437        // the sidecar's `links` field (the `links:` key only) does not list it.
1438        let fx = Fixture::new();
1439        fx.write("records/companies/acme.md", "company", "Acme", "");
1440        fx.write_raw(
1441            "records/contacts/sarah.md",
1442            "---\ntype: contact\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Sarah\ncompany: [[records/companies/acme]]\n---\nBody with no links.\n",
1443        );
1444        fx.reindex();
1445        let got = backlinks(&fx.store, &fx.p("records/companies/acme.md")).unwrap();
1446        assert_eq!(
1447            paths(&got),
1448            vec!["records/contacts/sarah"],
1449            "a wiki-link in a typed frontmatter field is an incoming edge"
1450        );
1451    }
1452
1453    #[test]
1454    fn backlinks_unscoped_scans_the_tree_not_only_the_sidecar() {
1455        // REGRESSION (loop budget): an UNSCOPED `backlinks` must resolve incoming
1456        // edges with a SINGLE embedded-ripgrep pass over the tree
1457        // (`Store::find_links_to`), NOT by reading the sidecar candidate set and
1458        // then `read_to_string`-confirming each candidate (which re-opens every
1459        // content file → O(store); the documented >3x budget miss). A ripgrep
1460        // pass is the same scan engine `validate`/`rename`/`dbmd links` ride, and
1461        // the tree — not the sidecar — is its ground truth: a linker that is on
1462        // disk but absent from every sidecar (stale / never-built index) is still
1463        // found. We assert that behaviorally, which fails loudly if the unscoped
1464        // path ever reverts to the sidecar-bounded per-candidate confirm loop
1465        // (that loop would NOT find the unindexed linker).
1466        let fx = Fixture::new();
1467        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1468        fx.write(
1469            "records/profiles/indexed.md",
1470            "profile",
1471            "Indexed",
1472            "[[records/contacts/sarah]]",
1473        );
1474        fx.reindex(); // builds sidecars for sarah + the indexed linker
1475
1476        // Now drop a NEW linker on disk WITHOUT reindexing — it is on disk but in
1477        // no sidecar.
1478        fx.write(
1479            "records/profiles/unindexed.md",
1480            "profile",
1481            "Unindexed",
1482            "[[records/contacts/sarah]]",
1483        );
1484
1485        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1486        assert_eq!(
1487            paths(&got),
1488            vec!["records/profiles/indexed", "records/profiles/unindexed"],
1489            "unscoped backlinks ripgrep-scans the tree, so the on-disk-but-unindexed \
1490             linker is found too — not only the sidecar-catalogued one"
1491        );
1492    }
1493
1494    #[test]
1495    fn backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk() {
1496        // REGRESSION (scale contract): the SCOPED form (`--type` / `--in`) is the
1497        // I/O-scoped path — it enumerates candidates from the relevant type-folder
1498        // `index.jsonl` sidecars and parses only those, NOT a whole-tree walk.
1499        // That is what makes the scope an I/O scope, not just a result filter:
1500        // a linker that is on disk but ABSENT from the sidecar (stale / never-built
1501        // index) is NOT discovered by the scoped call (the sidecar bounds which
1502        // files are candidates). This is the loop-vs-walk distinction the SPEC
1503        // draws, and it is exactly the inverse of the unscoped tree scan above.
1504        let fx = Fixture::new();
1505        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1506        fx.write(
1507            "records/profiles/indexed.md",
1508            "profile",
1509            "Indexed",
1510            "[[records/contacts/sarah]]",
1511        );
1512        fx.reindex(); // builds sidecars for sarah + the indexed linker
1513
1514        // Drop a NEW profile linker on disk WITHOUT reindexing — on disk, in no
1515        // sidecar.
1516        fx.write(
1517            "records/profiles/unindexed.md",
1518            "profile",
1519            "Unindexed",
1520            "[[records/contacts/sarah]]",
1521        );
1522
1523        // Scoped to the `profile` type: the candidate set is the sidecar's, so
1524        // only the catalogued linker is found — the unindexed one is invisible.
1525        let only_profiles = vec!["profile".to_string()];
1526        let got = backlinks_filtered(
1527            &fx.store,
1528            &fx.p("records/contacts/sarah.md"),
1529            &only_profiles,
1530            None,
1531        )
1532        .unwrap();
1533        assert_eq!(
1534            paths(&got),
1535            vec!["records/profiles/indexed"],
1536            "scoped backlinks reads the sidecar candidate set; the on-disk-but-unindexed \
1537             linker is not tree-walked"
1538        );
1539    }
1540
1541    #[test]
1542    fn backlinks_filtered_type_scopes_the_candidate_set() {
1543        // `--type` narrows backlinks to linkers of that type. Two files link to
1544        // the target — one `meeting`, one `profile`; filtering to `meeting`
1545        // returns only the meeting.
1546        let fx = Fixture::new();
1547        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1548        fx.write(
1549            "records/meetings/m1.md",
1550            "meeting",
1551            "Call",
1552            "[[records/contacts/sarah]]",
1553        );
1554        fx.write(
1555            "records/profiles/bio.md",
1556            "profile",
1557            "Bio",
1558            "[[records/contacts/sarah]]",
1559        );
1560        fx.reindex();
1561
1562        let only_meetings = vec!["meeting".to_string()];
1563        let got = backlinks_filtered(
1564            &fx.store,
1565            &fx.p("records/contacts/sarah.md"),
1566            &only_meetings,
1567            None,
1568        )
1569        .unwrap();
1570        assert_eq!(
1571            paths(&got),
1572            vec!["records/meetings/m1"],
1573            "--type meeting must exclude the profile linker"
1574        );
1575
1576        // Unfiltered, both come back — proving the filter (not the data) dropped one.
1577        let all = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1578        assert_eq!(
1579            paths(&all),
1580            vec!["records/meetings/m1", "records/profiles/bio"]
1581        );
1582    }
1583
1584    #[test]
1585    fn backlinks_filtered_layer_scopes_the_candidate_set() {
1586        // `--in <layer>` narrows backlinks to linkers under that layer. The two
1587        // linkers live in different layers (a sources email and a records
1588        // meeting) so the scope genuinely separates them.
1589        let fx = Fixture::new();
1590        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1591        fx.write(
1592            "records/meetings/m1.md",
1593            "meeting",
1594            "Call",
1595            "[[records/contacts/sarah]]",
1596        );
1597        fx.write(
1598            "sources/emails/intro.md",
1599            "email",
1600            "Intro",
1601            "[[records/contacts/sarah]]",
1602        );
1603        fx.reindex();
1604
1605        let got = backlinks_filtered(
1606            &fx.store,
1607            &fx.p("records/contacts/sarah.md"),
1608            &[],
1609            Some(Layer::Sources),
1610        )
1611        .unwrap();
1612        assert_eq!(
1613            paths(&got),
1614            vec!["sources/emails/intro"],
1615            "--in sources must keep only the sources-layer linker"
1616        );
1617
1618        let records_only = backlinks_filtered(
1619            &fx.store,
1620            &fx.p("records/contacts/sarah.md"),
1621            &[],
1622            Some(Layer::Records),
1623        )
1624        .unwrap();
1625        assert_eq!(paths(&records_only), vec!["records/meetings/m1"]);
1626    }
1627
1628    #[test]
1629    fn backlinks_scoped_type_spans_all_topic_folders_in_its_layer() {
1630        // REGRESSION (finding #12): a `type` can legitimately span several folders
1631        // within one layer — a `profile` is filed under its canonical
1632        // `records/profiles/` folder, but an agent may also file a profile under
1633        // another `records/<folder>/` (the type, not the folder, is authoritative).
1634        // The scoped candidate set must read the whole `records/` layer and filter
1635        // by type, NOT just the canonical-guess folder `records/profiles/`. Before
1636        // the fix, `find_by_type("profile")` read ONLY `records/profiles/index.jsonl`
1637        // whenever that sidecar existed, silently dropping every profile linker
1638        // filed under any other folder — so `backlinks --type profile` under-reported
1639        // dependents (a wrong blast-radius check) the moment a `records/profiles/`
1640        // page also existed.
1641        //
1642        // The trigger needs BOTH: a populated `records/profiles/` (so its canonical
1643        // sidecar exists) AND a profile elsewhere in the layer that links the
1644        // target. The earlier
1645        // `backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk` test
1646        // masks this bug precisely because its fixture has no `records/profiles/`.
1647        let fx = Fixture::new();
1648        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1649        // A profile in the CANONICAL type folder, NOT linking the target — its
1650        // only purpose is to make `records/profiles/index.jsonl` exist on disk.
1651        fx.write(
1652            "records/profiles/glossary.md",
1653            "profile",
1654            "Glossary",
1655            "No link to sarah here.",
1656        );
1657        // A profile in a NON-canonical folder that DOES link the target.
1658        fx.write(
1659            "records/people/sarah.md",
1660            "profile",
1661            "Sarah bio",
1662            "Profile of [[records/contacts/sarah]].",
1663        );
1664        fx.reindex(); // builds records/profiles/index.jsonl AND records/people/index.jsonl
1665
1666        // Scoped to `profile`: the off-canonical linker MUST be found. Pre-fix,
1667        // the candidate set was only `records/profiles/`'s sidecar, so this was empty.
1668        let scoped = backlinks_filtered(
1669            &fx.store,
1670            &fx.p("records/contacts/sarah.md"),
1671            &["profile".to_string()],
1672            None,
1673        )
1674        .unwrap();
1675        assert_eq!(
1676            paths(&scoped),
1677            vec!["records/people/sarah"],
1678            "a profile filed outside records/profiles/ must still be a scoped backlink"
1679        );
1680
1681        // Cross-check: the unscoped path (ripgrep tree scan) finds the same single
1682        // linker, proving the scoped result is now complete — not over- or
1683        // under-counting — and that the data was real all along.
1684        let unscoped = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1685        assert_eq!(
1686            paths(&unscoped),
1687            vec!["records/people/sarah"],
1688            "scoped and unscoped backlinks must agree on the edge set"
1689        );
1690    }
1691
1692    // ── neighborhood ─────────────────────────────────────────────────────────
1693
1694    #[test]
1695    fn neighborhood_hops_zero_is_empty() {
1696        let fx = Fixture::new();
1697        fx.write(
1698            "records/profiles/a.md",
1699            "profile",
1700            "A",
1701            "[[records/profiles/b]]",
1702        );
1703        fx.write("records/profiles/b.md", "profile", "B", "");
1704        let slice = neighborhood(
1705            &fx.store,
1706            &fx.p("records/profiles/a.md"),
1707            0,
1708            &[],
1709            Direction::Both,
1710        )
1711        .unwrap();
1712        assert_eq!(slice.seed, fx.p("records/profiles/a"));
1713        assert!(slice.nodes.is_empty());
1714    }
1715
1716    #[test]
1717    fn neighborhood_outgoing_one_hop_reads_summary_and_type() {
1718        let fx = Fixture::new();
1719        fx.write(
1720            "records/profiles/a.md",
1721            "profile",
1722            "Person A",
1723            "Knows [[records/contacts/b]].",
1724        );
1725        fx.write("records/contacts/b.md", "contact", "Contact B summary", "");
1726        let slice = neighborhood(
1727            &fx.store,
1728            &fx.p("records/profiles/a.md"),
1729            1,
1730            &[],
1731            Direction::Outgoing,
1732        )
1733        .unwrap();
1734        assert_eq!(slice.nodes.len(), 1);
1735        let n = &slice.nodes[0];
1736        assert_eq!(n.path, fx.p("records/contacts/b"));
1737        assert_eq!(n.summary, "Contact B summary");
1738        assert_eq!(n.type_.as_deref(), Some("contact"));
1739        assert_eq!(n.hops, 1);
1740        assert_eq!(
1741            n.via,
1742            Some((fx.p("records/profiles/a"), Direction::Outgoing))
1743        );
1744    }
1745
1746    #[test]
1747    fn neighborhood_incoming_only_walks_backlinks() {
1748        let fx = Fixture::new();
1749        // a -> seed (incoming to seed). seed -> c (outgoing from seed).
1750        fx.write(
1751            "records/profiles/seed.md",
1752            "profile",
1753            "Seed",
1754            "Out to [[records/profiles/c]].",
1755        );
1756        fx.write(
1757            "records/profiles/a.md",
1758            "profile",
1759            "A",
1760            "In to [[records/profiles/seed]].",
1761        );
1762        fx.write("records/profiles/c.md", "profile", "C", "");
1763        fx.reindex();
1764        let slice = neighborhood(
1765            &fx.store,
1766            &fx.p("records/profiles/seed.md"),
1767            1,
1768            &[],
1769            Direction::Incoming,
1770        )
1771        .unwrap();
1772        // Incoming direction: only `a` (which links TO seed), not `c`.
1773        assert_eq!(
1774            paths(
1775                &slice
1776                    .nodes
1777                    .iter()
1778                    .map(|n| n.path.clone())
1779                    .collect::<Vec<_>>()
1780            ),
1781            vec!["records/profiles/a"]
1782        );
1783        assert_eq!(
1784            slice.nodes[0].via,
1785            Some((fx.p("records/profiles/seed"), Direction::Incoming))
1786        );
1787    }
1788
1789    #[test]
1790    fn neighborhood_bounded_bfs_respects_hop_limit_and_min_distance() {
1791        let fx = Fixture::new();
1792        // Chain a -> b -> c -> d, all outgoing.
1793        fx.write("records/c/a.md", "concept", "A", "[[records/c/b]]");
1794        fx.write("records/c/b.md", "concept", "B", "[[records/c/c]]");
1795        fx.write("records/c/c.md", "concept", "C", "[[records/c/d]]");
1796        fx.write("records/c/d.md", "concept", "D", "");
1797        let slice = neighborhood(
1798            &fx.store,
1799            &fx.p("records/c/a.md"),
1800            2,
1801            &[],
1802            Direction::Outgoing,
1803        )
1804        .unwrap();
1805        // 2 hops reaches b (1) and c (2), not d (3).
1806        let by_path: HashMap<String, u32> = slice
1807            .nodes
1808            .iter()
1809            .map(|n| (n.path.to_string_lossy().to_string(), n.hops))
1810            .collect();
1811        assert_eq!(by_path.get("records/c/b").copied(), Some(1));
1812        assert_eq!(by_path.get("records/c/c").copied(), Some(2));
1813        assert_eq!(by_path.get("records/c/d"), None);
1814        assert_eq!(slice.nodes.len(), 2);
1815    }
1816
1817    #[test]
1818    fn neighborhood_records_min_hops_on_diamond() {
1819        let fx = Fixture::new();
1820        // Diamond: a -> b, a -> c, b -> d, c -> d. d is reachable at hop 2 from
1821        // either branch; it must be recorded once, at hop 2.
1822        fx.write(
1823            "records/d/a.md",
1824            "concept",
1825            "A",
1826            "[[records/d/b]] [[records/d/c]]",
1827        );
1828        fx.write("records/d/b.md", "concept", "B", "[[records/d/d]]");
1829        fx.write("records/d/c.md", "concept", "C", "[[records/d/d]]");
1830        fx.write("records/d/d.md", "concept", "D", "");
1831        let slice = neighborhood(
1832            &fx.store,
1833            &fx.p("records/d/a.md"),
1834            3,
1835            &[],
1836            Direction::Outgoing,
1837        )
1838        .unwrap();
1839        let d_nodes: Vec<&ContextNode> = slice
1840            .nodes
1841            .iter()
1842            .filter(|n| n.path == fx.p("records/d/d"))
1843            .collect();
1844        assert_eq!(d_nodes.len(), 1, "d must appear exactly once");
1845        assert_eq!(d_nodes[0].hops, 2, "d's min distance from a is 2");
1846        // b and c at hop 1, d at hop 2 => 3 nodes total, no cycle blowup.
1847        assert_eq!(slice.nodes.len(), 3);
1848    }
1849
1850    #[test]
1851    fn neighborhood_type_filter_narrows_results_but_not_traversal() {
1852        let fx = Fixture::new();
1853        // seed -> contact -> meeting. Filtering to `meeting` must still reach
1854        // the meeting THROUGH the (excluded) contact at hop 2.
1855        fx.write(
1856            "records/profiles/seed.md",
1857            "profile",
1858            "Seed",
1859            "[[records/contacts/sarah]]",
1860        );
1861        fx.write(
1862            "records/contacts/sarah.md",
1863            "contact",
1864            "Sarah",
1865            "[[records/meetings/m1]]",
1866        );
1867        fx.write("records/meetings/m1.md", "meeting", "Renewal call", "");
1868        let only_meetings = vec!["meeting".to_string()];
1869        let slice = neighborhood(
1870            &fx.store,
1871            &fx.p("records/profiles/seed.md"),
1872            2,
1873            &only_meetings,
1874            Direction::Outgoing,
1875        )
1876        .unwrap();
1877        // Only the meeting is returned; the contact is traversed but filtered out.
1878        assert_eq!(slice.nodes.len(), 1);
1879        assert_eq!(slice.nodes[0].path, fx.p("records/meetings/m1"));
1880        assert_eq!(slice.nodes[0].type_.as_deref(), Some("meeting"));
1881        assert_eq!(slice.nodes[0].hops, 2);
1882    }
1883
1884    #[test]
1885    fn neighborhood_capped_bounds_traversal_not_just_output() {
1886        // REGRESSION (finding #16): `neighborhood` expands every reached node, and
1887        // each incoming-edge expansion is a full-store scan, so the per-node cost
1888        // is O(visited × store). The CLI's `--limit` was applied post-hoc as a
1889        // `.take(n)` on the RESULT, which caps printed nodes but NOT the traversal
1890        // — the scans still fire for every reachable node. `neighborhood_capped`
1891        // bounds the traversal itself: once `max_nodes` distinct nodes are
1892        // admitted, the BFS stops discovering (and therefore stops scanning).
1893        //
1894        // Structure proving traversal — not just output — is bounded:
1895        //   seed -> a, b, c   (hop 1, discovered in sorted order: a, b, c)
1896        //   a    -> deep      (hop 2, reachable ONLY by expanding `a`)
1897        // Cap at 2: admit `a` and `b`, stop before `c` and before any hop-2
1898        // expansion. `deep` is therefore unreachable. A post-hoc `.take(2)` would
1899        // have traversed the whole graph (reaching `deep`) and only then truncated
1900        // — so the absence of `deep` is observable proof the traversal stopped.
1901        let fx = Fixture::new();
1902        fx.write(
1903            "records/n/seed.md",
1904            "concept",
1905            "Seed",
1906            "[[records/n/a]] [[records/n/b]] [[records/n/c]]",
1907        );
1908        fx.write("records/n/a.md", "concept", "A", "[[records/n/deep]]");
1909        fx.write("records/n/b.md", "concept", "B", "");
1910        fx.write("records/n/c.md", "concept", "C", "");
1911        fx.write("records/n/deep.md", "concept", "Deep", "");
1912
1913        // Uncapped over 3 hops: all four reachable nodes appear (a, b, c at hop 1,
1914        // deep at hop 2) — the full set the cap is measured against.
1915        let full = neighborhood(
1916            &fx.store,
1917            &fx.p("records/n/seed.md"),
1918            3,
1919            &[],
1920            Direction::Outgoing,
1921        )
1922        .unwrap();
1923        assert_eq!(
1924            paths(
1925                &full
1926                    .nodes
1927                    .iter()
1928                    .map(|n| n.path.clone())
1929                    .collect::<Vec<_>>()
1930            ),
1931            vec![
1932                "records/n/a",
1933                "records/n/b",
1934                "records/n/c",
1935                "records/n/deep"
1936            ],
1937            "uncapped traversal reaches every node within the hop budget"
1938        );
1939
1940        // Capped at 2 over the SAME hop budget: exactly the first two hop-1 nodes,
1941        // and crucially NOT `deep` — the cap halted the BFS before any node was
1942        // expanded into hop 2, so the deep node was never traversed to.
1943        let capped = neighborhood_capped(
1944            &fx.store,
1945            &fx.p("records/n/seed.md"),
1946            3,
1947            &[],
1948            Direction::Outgoing,
1949            Some(2),
1950        )
1951        .unwrap();
1952        assert_eq!(
1953            paths(
1954                &capped
1955                    .nodes
1956                    .iter()
1957                    .map(|n| n.path.clone())
1958                    .collect::<Vec<_>>()
1959            ),
1960            vec!["records/n/a", "records/n/b"],
1961            "the cap bounds traversal: only the first 2 nodes are reached, and the \
1962             hop-2 `deep` node (reachable only by expanding a capped-out node) is \
1963             never traversed"
1964        );
1965
1966        // `max_nodes = None` is exactly the unbounded `neighborhood` behavior.
1967        let uncapped = neighborhood_capped(
1968            &fx.store,
1969            &fx.p("records/n/seed.md"),
1970            3,
1971            &[],
1972            Direction::Outgoing,
1973            None,
1974        )
1975        .unwrap();
1976        assert_eq!(
1977            uncapped.nodes.len(),
1978            full.nodes.len(),
1979            "None cap matches the unbounded neighborhood result"
1980        );
1981    }
1982
1983    #[test]
1984    fn neighborhood_capped_both_direction_caps_the_node_count() {
1985        // The CLI always passes `Direction::Both` (the per-node backlinks scan is
1986        // the expensive path the cap exists to bound). The cap gates discovery in
1987        // any direction, so a hub linked from many nodes is still bounded.
1988        let fx = Fixture::new();
1989        fx.write("records/profiles/hub.md", "profile", "Hub", "");
1990        for n in ["a", "b", "c", "d", "e"] {
1991            fx.write(
1992                &format!("records/profiles/{n}.md"),
1993                "profile",
1994                n,
1995                "[[records/profiles/hub]]",
1996            );
1997        }
1998        fx.reindex();
1999
2000        let capped = neighborhood_capped(
2001            &fx.store,
2002            &fx.p("records/profiles/hub.md"),
2003            1,
2004            &[],
2005            Direction::Both,
2006            Some(3),
2007        )
2008        .unwrap();
2009        assert_eq!(
2010            capped.nodes.len(),
2011            3,
2012            "Both-direction neighborhood is bounded to the node cap"
2013        );
2014
2015        // Without the cap the same call returns all five backlinking nodes,
2016        // proving the cap (not the data) limited the set.
2017        let uncapped = neighborhood(
2018            &fx.store,
2019            &fx.p("records/profiles/hub.md"),
2020            1,
2021            &[],
2022            Direction::Both,
2023        )
2024        .unwrap();
2025        assert_eq!(uncapped.nodes.len(), 5);
2026    }
2027
2028    #[test]
2029    fn neighborhood_cycle_terminates() {
2030        let fx = Fixture::new();
2031        // a <-> b cycle. Must not loop forever; each appears once.
2032        fx.write("records/g/a.md", "concept", "A", "[[records/g/b]]");
2033        fx.write("records/g/b.md", "concept", "B", "[[records/g/a]]");
2034        fx.reindex();
2035        let slice =
2036            neighborhood(&fx.store, &fx.p("records/g/a.md"), 10, &[], Direction::Both).unwrap();
2037        // From a: b is the only other node (a is the seed, excluded).
2038        assert_eq!(
2039            paths(
2040                &slice
2041                    .nodes
2042                    .iter()
2043                    .map(|n| n.path.clone())
2044                    .collect::<Vec<_>>()
2045            ),
2046            vec!["records/g/b"]
2047        );
2048    }
2049
2050    // ── orphans ──────────────────────────────────────────────────────────────
2051
2052    #[test]
2053    fn orphans_finds_files_with_no_edges_either_direction() {
2054        let fx = Fixture::new();
2055        // Wired pair: a links to b (a has outgoing, b has incoming).
2056        fx.write(
2057            "records/profiles/a.md",
2058            "profile",
2059            "A",
2060            "[[records/profiles/b]]",
2061        );
2062        fx.write("records/profiles/b.md", "profile", "B", "");
2063        // Orphan: no links in or out.
2064        fx.write(
2065            "sources/emails/lonely.md",
2066            "email",
2067            "Lonely email",
2068            "Just text, no links.",
2069        );
2070        let got = orphans(&fx.store, None).unwrap();
2071        assert_eq!(paths(&got), vec!["sources/emails/lonely.md"]);
2072    }
2073
2074    #[test]
2075    fn orphans_file_with_only_broken_outgoing_link_is_orphan() {
2076        let fx = Fixture::new();
2077        // Broken targets are validation issues, not graph edges to another
2078        // store file. A file whose only link points nowhere is still an orphan.
2079        fx.write(
2080            "records/profiles/a.md",
2081            "profile",
2082            "A",
2083            "[[records/contacts/ghost]]",
2084        );
2085        let got = orphans(&fx.store, None).unwrap();
2086        assert!(
2087            paths(&got).contains(&"records/profiles/a.md".to_string()),
2088            "broken outgoing links must not wire the graph: {got:?}"
2089        );
2090    }
2091
2092    #[test]
2093    fn orphans_file_with_only_incoming_is_not_orphan() {
2094        let fx = Fixture::new();
2095        // `target` has no outgoing links but IS linked to by `linker` — not an orphan.
2096        fx.write("records/contacts/target.md", "contact", "Target", "");
2097        fx.write(
2098            "records/profiles/linker.md",
2099            "profile",
2100            "Linker",
2101            "[[records/contacts/target]]",
2102        );
2103        let got = orphans(&fx.store, None).unwrap();
2104        assert!(
2105            !paths(&got).contains(&"records/contacts/target.md".to_string()),
2106            "incoming-only is not an orphan: {got:?}"
2107        );
2108        // `linker` has outgoing, so also not an orphan.
2109        assert!(!paths(&got).contains(&"records/profiles/linker.md".to_string()));
2110    }
2111
2112    #[test]
2113    fn orphans_incoming_link_from_other_layer_unorphans() {
2114        let fx = Fixture::new();
2115        // Candidate in records/, only incoming edge comes from sources/ — a
2116        // cross-layer link must still un-orphan it even when scoped to records.
2117        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
2118        fx.write(
2119            "sources/emails/sarah.md",
2120            "email",
2121            "bio",
2122            "[[records/contacts/sarah]]",
2123        );
2124        // A genuine orphan in records/ to prove the scope still returns something.
2125        fx.write("records/contacts/nemo.md", "contact", "Nemo", "");
2126        let got = orphans(&fx.store, Some(Layer::Records)).unwrap();
2127        assert_eq!(paths(&got), vec!["records/contacts/nemo.md"]);
2128    }
2129
2130    #[test]
2131    fn orphans_layer_scope_filters_candidates() {
2132        let fx = Fixture::new();
2133        // Orphans across both layers: one source, and two records (an atomic
2134        // contact + a conclusion `profile`, the former wiki-page).
2135        fx.write("sources/emails/s.md", "email", "S", "no links");
2136        fx.write("records/contacts/r.md", "contact", "R", "");
2137        fx.write("records/profiles/w.md", "profile", "W", "");
2138        // The records scope keeps only the two records-layer orphans.
2139        let only_records = orphans(&fx.store, Some(Layer::Records)).unwrap();
2140        assert_eq!(
2141            paths(&only_records),
2142            vec!["records/contacts/r.md", "records/profiles/w.md"]
2143        );
2144        let only_sources = orphans(&fx.store, Some(Layer::Sources)).unwrap();
2145        assert_eq!(paths(&only_sources), vec!["sources/emails/s.md"]);
2146        // No scope: all three, sorted (records, records, sources).
2147        let all = orphans(&fx.store, None).unwrap();
2148        assert_eq!(
2149            paths(&all),
2150            vec![
2151                "records/contacts/r.md",
2152                "records/profiles/w.md",
2153                "sources/emails/s.md",
2154            ]
2155        );
2156    }
2157
2158    #[test]
2159    fn orphans_self_link_does_not_count_as_an_edge() {
2160        let fx = Fixture::new();
2161        // A page that only links to itself has no real edges => still an orphan.
2162        fx.write(
2163            "records/synthesis/solo.md",
2164            "synthesis",
2165            "Solo",
2166            "I reference [[records/synthesis/solo]] only.",
2167        );
2168        let got = orphans(&fx.store, None).unwrap();
2169        assert_eq!(paths(&got), vec!["records/synthesis/solo.md"]);
2170    }
2171
2172    #[test]
2173    fn orphans_excludes_index_and_db_files() {
2174        let fx = Fixture::new();
2175        // A lone index.md / DB.md must never be reported as an orphan content file.
2176        fx.write_raw(
2177            "records/index.md",
2178            "---\ntype: index\nscope: layer\nfolder: records\n---\n# records\n",
2179        );
2180        fx.write(
2181            "records/profiles/real-orphan.md",
2182            "profile",
2183            "Real",
2184            "no links",
2185        );
2186        let got = orphans(&fx.store, None).unwrap();
2187        assert_eq!(paths(&got), vec!["records/profiles/real-orphan.md"]);
2188    }
2189
2190    // ── frontmatter_block helper ─────────────────────────────────────────────
2191
2192    #[test]
2193    fn frontmatter_block_extracts_between_fences() {
2194        let text = "---\ntype: contact\nsummary: hi\n---\nbody here\n";
2195        assert_eq!(
2196            frontmatter_block(text),
2197            Some("type: contact\nsummary: hi\n")
2198        );
2199    }
2200
2201    #[test]
2202    fn frontmatter_block_none_without_leading_fence() {
2203        let text = "no frontmatter here\n";
2204        assert_eq!(frontmatter_block(text), None);
2205    }
2206
2207    #[test]
2208    fn frontmatter_block_tolerates_leading_bom() {
2209        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
2210        // fence must not hide the frontmatter from the graph layer — otherwise a
2211        // BOM-prefixed file the catalog indexes contributes no backlinks/edges.
2212        // Pre-fix the `---\n` strip failed on the BOM and returned None.
2213        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody here\n";
2214        assert_eq!(
2215            frontmatter_block(text),
2216            Some("type: contact\nsummary: hi\n"),
2217            "a leading BOM must not hide frontmatter from the graph layer"
2218        );
2219    }
2220
2221    // ── shared edge notion: whitespace / fence / case / containment ──────────
2222
2223    /// Padded `[[ x ]]` must be a forward edge AND (after reindex) a backward
2224    /// edge — the two views agreeing on the same edge in a clean store.
2225    #[test]
2226    fn padded_link_is_both_a_forward_and_backward_edge() {
2227        let fx = Fixture::new();
2228        fx.write(
2229            "records/contacts/sarah.md",
2230            "contact",
2231            "Sarah",
2232            "the contact",
2233        );
2234        fx.write(
2235            "records/profiles/a.md",
2236            "profile",
2237            "A",
2238            "See [[ records/contacts/sarah ]] today.",
2239        );
2240        fx.reindex();
2241
2242        assert_eq!(
2243            paths(&forwardlinks(&fx.store, Path::new("records/profiles/a.md")).unwrap()),
2244            vec!["records/contacts/sarah"],
2245            "padded link is a forward edge"
2246        );
2247        assert_eq!(
2248            paths(&backlinks(&fx.store, Path::new("records/contacts/sarah.md")).unwrap()),
2249            vec!["records/profiles/a"],
2250            "padded link is the SAME backward edge (forward and backward agree)"
2251        );
2252    }
2253
2254    /// A `[[...]]` only inside a fenced code block is a documentation example,
2255    /// not an edge: no forward edge, no backward edge, and the source page is an
2256    /// orphan (no real links). Matches validate's fence-aware extractor.
2257    #[test]
2258    fn fenced_link_is_not_an_edge_and_page_is_orphan() {
2259        let fx = Fixture::new();
2260        fx.write(
2261            "records/contacts/sarah.md",
2262            "contact",
2263            "Sarah",
2264            "the contact",
2265        );
2266        fx.write(
2267            "records/synthesis/howto.md",
2268            "synthesis",
2269            "Howto",
2270            "```markdown\n[[records/contacts/sarah]] is how you link.\n```",
2271        );
2272        fx.reindex();
2273
2274        assert!(
2275            forwardlinks(&fx.store, Path::new("records/synthesis/howto.md"))
2276                .unwrap()
2277                .is_empty(),
2278            "a fenced example is not a forward edge"
2279        );
2280        assert!(
2281            backlinks(&fx.store, Path::new("records/contacts/sarah.md"))
2282                .unwrap()
2283                .is_empty(),
2284            "a fenced example is not a backward edge"
2285        );
2286        let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2287        assert!(
2288            orphan_set.contains(&"records/synthesis/howto.md".to_string()),
2289            "a page whose only link is fenced has no real edges => orphan: {orphan_set:?}"
2290        );
2291    }
2292
2293    /// `rename` must NOT rewrite a `[[...]]` inside a fenced code block (it is
2294    /// verbatim documentation, not an edge), while still rewriting a real link.
2295    #[test]
2296    fn rewrite_links_to_leaves_fenced_examples_untouched() {
2297        let input = "\
2298Real [[records/contacts/sarah]] link.
2299
2300```markdown
2301Example: [[records/contacts/sarah]] inside a fence.
2302```
2303
2304Trailing [[records/contacts/sarah]].
2305";
2306        let got = rewrite_links_to(
2307            input,
2308            Path::new("records/contacts/sarah"),
2309            Path::new("records/contacts/sarah-chen"),
2310        );
2311        // The two non-fenced links retarget; the fenced one is verbatim.
2312        assert!(
2313            got.contains("Real [[records/contacts/sarah-chen]] link."),
2314            "real link before the fence must retarget"
2315        );
2316        assert!(
2317            got.contains("Trailing [[records/contacts/sarah-chen]]."),
2318            "real link after the fence must retarget"
2319        );
2320        assert!(
2321            got.contains("Example: [[records/contacts/sarah]] inside a fence."),
2322            "fenced example must stay verbatim, got:\n{got}"
2323        );
2324    }
2325
2326    /// `rewrite_links_to` matches a padded link and preserves the display.
2327    #[test]
2328    fn rewrite_links_to_matches_padded_link() {
2329        let got = rewrite_links_to(
2330            "See [[ records/contacts/sarah |Sarah]] today.",
2331            Path::new("records/contacts/sarah"),
2332            Path::new("records/contacts/sarah-chen"),
2333        );
2334        assert_eq!(got, "See [[records/contacts/sarah-chen|Sarah]] today.");
2335    }
2336
2337    /// On a case-insensitive filesystem a case-variant link is the same edge:
2338    /// backlinks finds it, orphans does NOT falsely orphan the target, and
2339    /// rename rewrites it. On a case-sensitive FS the link is genuinely a
2340    /// different target, so the test is skipped.
2341    #[cfg(unix)]
2342    #[test]
2343    fn case_variant_link_is_one_edge_on_case_insensitive_fs() {
2344        // Probe the filesystem the same way the production code does
2345        // (`link_edge_key` is imported at module scope).
2346        if link_edge_key("A") != link_edge_key("a") {
2347            // case-sensitive filesystem: the case-variant link is a different
2348            // target, so this scenario doesn't apply.
2349            return;
2350        }
2351        let fx = Fixture::new();
2352        fx.write(
2353            "records/contacts/sarah-chen.md",
2354            "contact",
2355            "Sarah",
2356            "the contact",
2357        );
2358        fx.write(
2359            "records/profiles/bio.md",
2360            "profile",
2361            "Bio",
2362            "See [[records/contacts/Sarah-Chen]].",
2363        );
2364        fx.reindex();
2365
2366        assert_eq!(
2367            paths(&backlinks(&fx.store, Path::new("records/contacts/sarah-chen.md")).unwrap()),
2368            vec!["records/profiles/bio"],
2369            "case-variant incoming link must be a backward edge"
2370        );
2371        let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2372        assert!(
2373            !orphan_set.contains(&"records/contacts/sarah-chen.md".to_string()),
2374            "a target with a live case-variant incoming link must NOT be orphaned: {orphan_set:?}"
2375        );
2376
2377        let rewritten = rewrite_links_to(
2378            "See [[records/contacts/Sarah-Chen]].",
2379            Path::new("records/contacts/sarah-chen"),
2380            Path::new("records/contacts/sarah"),
2381        );
2382        assert_eq!(
2383            rewritten, "See [[records/contacts/sarah]].",
2384            "rename must rewrite the case-variant link on a case-insensitive FS"
2385        );
2386    }
2387
2388    /// A `[[../outside/x]]` escaping wiki-link is never a forward edge, and a
2389    /// `neighborhood` from the escaping page never reads or traverses through the
2390    /// external file — closing the disclosure vector.
2391    #[cfg(unix)]
2392    #[test]
2393    fn escaping_link_is_not_an_edge_and_neighborhood_does_not_escape() {
2394        let fx = Fixture::new();
2395        // An external file OUTSIDE the store root, with its own in-store link.
2396        let outside_dir = fx.store.root.parent().unwrap().join("outside");
2397        fs::create_dir_all(&outside_dir).unwrap();
2398        fs::write(
2399            outside_dir.join("secret.md"),
2400            "---\ntype: note\nsummary: TOPSECRET\n---\nLinks [[records/contacts/sarah]].\n",
2401        )
2402        .unwrap();
2403        fx.write(
2404            "records/contacts/sarah.md",
2405            "contact",
2406            "Sarah",
2407            "the contact",
2408        );
2409        fx.write(
2410            "records/concepts/traversal.md",
2411            "concept",
2412            "Traversal",
2413            "See [[../outside/secret]].",
2414        );
2415        fx.reindex();
2416
2417        // The escaping target is not a forward edge.
2418        assert!(
2419            forwardlinks(&fx.store, Path::new("records/concepts/traversal.md"))
2420                .unwrap()
2421                .is_empty(),
2422            "an escaping `[[../outside/secret]]` must not be a forward edge"
2423        );
2424
2425        // Neighborhood from the escaping page reaches nothing through the
2426        // external file (the external file is never read/traversed).
2427        let slice = neighborhood(
2428            &fx.store,
2429            Path::new("records/concepts/traversal.md"),
2430            2,
2431            &[],
2432            Direction::Outgoing,
2433        )
2434        .unwrap();
2435        assert!(
2436            slice
2437                .nodes
2438                .iter()
2439                .all(|n| !n.path.to_string_lossy().contains("outside")),
2440            "neighborhood must not read/traverse the external file: {:?}",
2441            slice.nodes
2442        );
2443    }
2444}