Skip to main content

dbmd_core/
graph.rs

1//! `graph` — the wiki-link **relationship layer**.
2//!
3//! Wiki-links are curated-relevance edges (the LLM wrote them), so the graph's
4//! job is to **assemble the relevant context around a seed**, not to be
5//! analyzed. **All ops are on-demand — there is no maintained graph** (a
6//! persistent graph is the roadmap engine).
7//!
8//! [`backlinks`] / [`forwardlinks`] are loop ops (O(changed), never O(store)).
9//! [`neighborhood`] is the high-value context-hydration op. [`orphans`] is a
10//! SWEEP curation worklist.
11//!
12//! Whole-graph analytics (connected components, cycle detection, shortest
13//! path, sinks/sources, DOT/JSON export) are deliberately **not** here — a
14//! human studying the graph opens the store in Obsidian; broken-link detection
15//! is [`crate::validate`]'s job (`WIKI_LINK_BROKEN`).
16//!
17//! ## Implementation note — two paths for the incoming-edge scan
18//!
19//! The scale contract (SPEC § Tooling, plan: *"the interactive loop is
20//! O(changed), never O(store)"*) is the load-bearing rule here. [`backlinks`]
21//! is a loop op, so it must **not** open and `read_to_string` every content file
22//! in the store on each call. It resolves incoming edges by one of two paths,
23//! chosen by whether the call is scoped:
24//!
25//! - **Unscoped** (`dbmd graph backlinks <x>`, no `--type`/`--in`): one
26//!   embedded-ripgrep pass for the literal `[[<target>]]` over the tree, via
27//!   [`Store::find_links_to`] (`grep` + `ignore`, early-exit per file) — the
28//!   same scan engine [`crate::validate`]'s working-set incoming-linker step
29//!   uses. A single store traversal with cheap presence-only matching, not N
30//!   whole-file parses; that is what keeps the unscoped call inside the loop
31//!   budget. [`backlinks`] then filters the raw hits to content files and emits
32//!   canonical bare targets (its relationship view), where the lower-level
33//!   [`Store::find_links_to`] returns every `.md` the text appears in.
34//! - **Scoped** (`--type` / `--in`): the candidate set is enumerated from the
35//!   relevant layer's `index.jsonl` sidecars — the sidecars of the one layer the
36//!   `--type` belongs to (via [`Store::sidecar_records`]), filtered to that type
37//!   — and each candidate is confirmed by a single-file parse. That is what makes
38//!   `--type` / `--in` an *I/O* scope, not just a result filter: a typed/layer-scoped
39//!   `backlinks` reads only the relevant layer's sidecars (O(entities-in-layer))
40//!   and parses only those files. A type's records can span several folders within
41//!   its layer (a `profile` filed under any `records/<folder>/`, not only its
42//!   canonical `records/profiles/`), so the read is layer-wide, not a single
43//!   canonical folder — otherwise off-canonical-folder linkers would be silently
44//!   dropped.
45//!
46//! **Why the scoped path confirms by parsing the candidate, not by trusting the
47//! sidecar's `links` field.** A sidecar record's `links` is the file's
48//! *frontmatter* `links:` list only — it does **not** capture wiki-links written
49//! in the body or inside other typed frontmatter fields (`company: [[…]]`,
50//! `attendees: [ … ]`, `derived_from: [ … ]`). [`forwardlinks`] extracts edges
51//! from the whole file, so to keep the two directions on the **same** edge set
52//! (an incoming edge to X is exactly: some file whose [`forwardlinks`] contains
53//! X) the incoming-edge confirmation re-parses each candidate file the same way.
54//! The sidecar bounds *which* files are candidates; the parse decides whether
55//! each truly links. The unscoped ripgrep path stays on that same edge set by
56//! matching the link text wherever it lives in the file (frontmatter or body).
57//! A node's `summary` / `type` likewise read frontmatter directly (the source of
58//! truth the sidecar is derived from; never stale).
59
60use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
61use std::path::{Path, PathBuf};
62
63use ignore::WalkBuilder;
64
65use crate::index::IndexRecord;
66use crate::store::{
67    canonical_link_target, ensure_path_within_store, extract_edge_targets, fence_closes,
68    fence_opens, layer_for_type, link_edge_key, Layer, Store, StoreError,
69};
70
71/// Which edge directions a traversal follows.
72#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub enum Direction {
74    /// Incoming edges only (backlinks).
75    Incoming,
76    /// Outgoing edges only (forwardlinks).
77    Outgoing,
78    /// Both directions.
79    Both,
80}
81
82/// One node reached during a [`neighborhood`] hydration: the file, its
83/// `summary`, and how it connects back toward the seed.
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub struct ContextNode {
86    /// The store-relative path of the reached file.
87    pub path: PathBuf,
88    /// The file's `summary` (read from its sidecar entry / frontmatter).
89    pub summary: String,
90    /// The file's `type`, when known.
91    pub type_: Option<String>,
92    /// Hop distance from the seed (the seed itself is 0).
93    pub hops: u32,
94    /// The relationship edge that brought this node into the slice: the path it
95    /// links to/from one hop closer to the seed, and the direction.
96    pub via: Option<(PathBuf, Direction)>,
97}
98
99/// The readable working-set digest [`neighborhood`] returns: the seed plus the
100/// reached nodes with their summaries and connections. The relationship-axis
101/// "turn a seed into context" primitive.
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct ContextSlice {
104    /// The seed the slice was hydrated from.
105    pub seed: PathBuf,
106    /// The reached nodes (excluding the seed), in BFS order.
107    pub nodes: Vec<ContextNode>,
108}
109
110/// Incoming edges to `path`: files that wiki-link to it. The blast-radius /
111/// dependents primitive before an edit. Store-wide (every layer / every type);
112/// see [`backlinks_filtered`] for the `--type` / `--in`-scoped form.
113///
114/// `path` is the store-relative target as it would be written inside a
115/// wiki-link (with or without a trailing `.md`; both resolve to the same
116/// target). Returns each linking file as its **canonical bare wiki-link path**
117/// (store-relative, no `.md`) — the same key [`forwardlinks`] emits, so the two
118/// directions round-trip and [`neighborhood`] can use one node identity.
119/// Deduped, sorted, never including the seed itself.
120pub fn backlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
121    backlinks_filtered(store, path, &[], None)
122}
123
124/// Incoming edges to `path`, scoped by the linking file's `type` and/or layer —
125/// the `dbmd graph backlinks --type/--in` surface.
126///
127/// **Scale (the loop contract).** Two paths, by whether the call is scoped:
128///
129/// - **Unscoped** (`types` empty *and* `layer` `None`): one embedded-ripgrep
130///   pass for `[[<target>]]` across the store via [`Store::find_links_to`] — a
131///   single `grep` + `ignore` traversal with early-exit per file, never a
132///   `read_to_string` of every content file. This is the same scan engine
133///   [`crate::validate::validate_working_set`]'s incoming-linker step rides, and
134///   it keeps the unscoped call inside the loop budget (the old per-candidate
135///   confirm-read re-opened every file in the store → O(store)).
136/// - **Scoped** (`types` and/or `layer` set): the candidate set — the files that
137///   *might* link to `path` — is read from the relevant layer's `index.jsonl`
138///   sidecars, so the call touches only the named layer(s): O(entities-in-layer),
139///   the sanctioned loop cost. Each candidate is then confirmed by a single-file
140///   parse. When `types` lists several types, the sidecars of each type's layer
141///   are read and the candidate sets unioned (filtered to the type), so a type
142///   whose records span multiple folders within its layer (e.g. a `profile` filed
143///   under any `records/<folder>/`) is fully covered; a `layer` further restricts
144///   the candidate paths to that layer.
145///
146/// **Correctness (one edge set, both paths).** An incoming edge to X is exactly:
147/// some file whose [`forwardlinks`] contains X — a wiki-link in the body or in
148/// *any* frontmatter field (`company: [[…]]`, `attendees: [ … ]`), not just the
149/// sidecar's frontmatter `links:` projection. Both paths honor that:
150/// - The unscoped scan matches the literal `[[<target>]]` text wherever it lives
151///   in a file (frontmatter or body), the same edges [`forwardlinks`] extracts.
152///   [`Store::find_links_to`] returns *every* `.md` carrying the link text
153///   (including `index.md` catalogs); [`backlinks`] is the relationship view, so
154///   the results are filtered to content files ([`is_content_rel`]) and emitted
155///   as canonical bare targets, self-excluded.
156/// - The scoped path confirms each candidate via [`file_links_to`], which
157///   delegates to [`forwardlinks`] (body + every frontmatter field) — so a
158///   body-only or typed-field edge is caught, not just the sidecar's `links:`
159///   list.
160///
161/// Result form (canonical bare paths, deduped, sorted, seed excluded) is
162/// identical on both paths and matches [`backlinks`].
163pub fn backlinks_filtered(
164    store: &Store,
165    path: &Path,
166    types: &[String],
167    layer: Option<Layer>,
168) -> Result<Vec<PathBuf>, StoreError> {
169    let target = normalize_target(path);
170    if target.is_empty() {
171        return Ok(Vec::new());
172    }
173    let target_key = edge_key(&target);
174
175    // Unscoped: one content pass over the store (O(store) scan with early-exit
176    // per file), not a per-candidate read of every content file. `find_links_to`
177    // returns every `.md` carrying an edge to the target (incl. catalog
178    // `index.md`); narrow to content files and canonicalize to the bare target
179    // form `backlinks` emits, dropping the seed's self-link.
180    if types.is_empty() && layer.is_none() {
181        let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
182        for rel in store.find_links_to(path)? {
183            if !is_content_rel(&rel) {
184                continue;
185            }
186            let linker = normalize_target(&rel);
187            if linker.is_empty() || edge_key(&linker) == target_key {
188                // A file never counts as its own backlink (case-folded so a
189                // case-variant self-link is still excluded).
190                continue;
191            }
192            hits.insert(PathBuf::from(linker));
193        }
194        return Ok(hits.into_iter().collect());
195    }
196
197    // Scoped: read only the named folder(s)' sidecars for the candidate set, then
198    // confirm each candidate with a single-file parse — O(folder), the I/O scope
199    // `--type` / `--in` buys.
200    let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
201    for candidate in candidate_records(store, types, layer)? {
202        let rel = &candidate.path;
203        let candidate_target = normalize_target(rel);
204        if candidate_target.is_empty() || edge_key(&candidate_target) == target_key {
205            // A file never counts as its own backlink.
206            continue;
207        }
208        // Confirm the edge by parsing the candidate file the same way
209        // forwardlinks does (body + all frontmatter), so body/typed-field links
210        // are caught — the sidecar's `links` field alone would miss them.
211        if file_links_to(store, rel, &target)? {
212            hits.insert(PathBuf::from(candidate_target));
213        }
214    }
215
216    Ok(hits.into_iter().collect())
217}
218
219/// Outgoing edges from `path`: the wiki-link targets extracted from that single
220/// file. Loop-fast; follow the evidence chain.
221///
222/// `path` is the store-relative path of the file to read. Targets are returned
223/// as store-relative paths (bare, no `.md`), deduped and sorted; the file's
224/// links to itself are dropped. A missing file yields an empty list (a
225/// dangling seed has no outgoing edges to report — broken-link detection is
226/// [`crate::validate`]'s job).
227pub fn forwardlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
228    let self_key = edge_key(&normalize_target(path));
229    let abs = match resolve_existing(store, path) {
230        Some(a) => a,
231        None => return Ok(Vec::new()),
232    };
233    // Decode the body LOSSILY (bytes -> `from_utf8_lossy`): wiki-link syntax
234    // (`[[...]]`) is ASCII, so a non-UTF8 byte elsewhere on a line cannot hide an
235    // edge. This mirrors the unscoped backlink scanner
236    // ([`Store::find_links_to_any`], which reads bytes + lossy by design) so
237    // SCOPED backlinks (which ride `forwardlinks`) agree with unscoped backlinks
238    // on a Latin-1-imported file instead of silently dropping its edges — a
239    // `read_to_string` that errored on `InvalidData` returned NO edges.
240    let body = match std::fs::read(&abs) {
241        Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
242        Err(e) => return Err(StoreError::Io(e)),
243    };
244
245    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
246    for target in extract_link_targets(&body) {
247        // Self-link drop is case-folded so a case-variant self-reference is also
248        // excluded on a case-insensitive filesystem.
249        if target.is_empty() || edge_key(&target) == self_key {
250            continue;
251        }
252        out.insert(PathBuf::from(target));
253    }
254    Ok(out.into_iter().collect())
255}
256
257/// The candidate set for an incoming-edge scan: the sidecar records that could
258/// link to the target, read from the type-folder `index.jsonl` sidecars (never
259/// a content-tree walk). `types`/`layer` narrow *which* sidecars are read — the
260/// I/O scope that keeps a typed/layer backlinks O(entities-in-layer).
261///
262/// - `types` non-empty: for each type, read **the whole layer** the type belongs
263///   to ([`layer_for_type`] → [`Store::sidecar_records`]) and keep the records of
264///   that `type`, unioned by path across the requested types. A `layer` filter,
265///   when given, intersects with the type's own layer (a type lives in exactly
266///   one layer, so a mismatched `--in` simply yields no candidates).
267/// - `types` empty: every sidecar record under `layer` (or store-wide when
268///   `None`) via [`Store::sidecar_records`].
269///
270/// **Why the whole layer, not just the type's canonical folder.** A `type` can
271/// legitimately span several folders within one layer — a conclusion `profile`
272/// is the canonical case (it lives under `records/profiles/` by default, but an
273/// agent may file one under any other `records/<folder>/`: `records/people/`,
274/// `records/projects/`, …). Reading only the single canonical-guess folder
275/// (`records/profiles/`) would silently drop every profile filed elsewhere in the
276/// layer, so a scoped `backlinks --type profile` would under-report dependents the
277/// moment that canonical folder exists — breaking the docstring's promise that the
278/// scoped edge set equals the unscoped one. Reading the type's full layer subtree
279/// and filtering by `type` is complete and still O(entities-in-layer), the
280/// sanctioned loop scope.
281fn candidate_records(
282    store: &Store,
283    types: &[String],
284    layer: Option<Layer>,
285) -> Result<Vec<IndexRecord>, StoreError> {
286    if types.is_empty() {
287        return store.sidecar_records(layer);
288    }
289    let mut by_path: std::collections::BTreeMap<PathBuf, IndexRecord> =
290        std::collections::BTreeMap::new();
291    for type_ in types {
292        // A type lives in exactly one layer; read that whole layer's sidecars so
293        // a record filed under a non-canonical folder of the same type (e.g. a
294        // `profile` under `records/people/` rather than `records/profiles/`) is
295        // still a candidate. An explicit `--in` layer that disagrees with the type's
296        // layer can never match the type, so skip the read entirely.
297        let type_layer = layer_for_type(type_);
298        if let Some(scope) = layer {
299            if scope != type_layer {
300                continue;
301            }
302        }
303        for rec in store.sidecar_records(Some(type_layer))? {
304            if rec.type_ == *type_ {
305                by_path.insert(rec.path.clone(), rec);
306            }
307        }
308    }
309    Ok(by_path.into_values().collect())
310}
311
312/// True if the store file at `rel` carries a wiki-link whose canonical target
313/// equals `target`. Delegates to [`forwardlinks`] so the incoming-edge predicate
314/// is *exactly* the outgoing-edge extraction — body + every frontmatter field —
315/// keeping the two directions on one edge set. `forwardlinks` already emits
316/// canonical bare targets, so `target` (likewise normalized by the caller) is
317/// compared directly. A missing/binary file links to nothing.
318fn file_links_to(store: &Store, rel: &Path, target: &str) -> Result<bool, StoreError> {
319    let edges = forwardlinks(store, rel)?;
320    let target_key = edge_key(target);
321    // Compare on the case-folded edge key so a case-variant link (e.g.
322    // `[[records/contacts/Sarah-Chen]]` to `sarah-chen.md`) is confirmed on a
323    // case-insensitive filesystem, agreeing with the unscoped scan and validate.
324    Ok(edges
325        .iter()
326        .any(|e| edge_key(&e.to_string_lossy()) == target_key))
327}
328
329/// **Context hydration.** Bounded BFS from `seed` over backlinks + forwardlinks
330/// out to `hops`, reading each reached file's `summary` + relationship, and
331/// returning a readable [`ContextSlice`]. Optionally filtered by `types` and
332/// `direction`. On-demand; no maintained graph. What the agent reaches for to
333/// assemble a working set in one call.
334///
335/// Traversal semantics:
336/// - **`hops`** bounds true graph distance from the seed. `hops == 0` returns
337///   an empty slice (the seed alone is no context).
338/// - **`direction`** selects which edges are followed: `Incoming` walks
339///   backlinks, `Outgoing` walks forwardlinks, `Both` walks the union.
340/// - **`types`**, when non-empty, filters which reached nodes appear in the
341///   slice — but traversal still passes *through* off-type nodes, so a
342///   `meeting` two hops out is still reachable through a `contact` even when
343///   filtering to `meeting`. (An empty `types` slice imposes no filter.)
344/// - Each node records the lowest hop count at which it is first reached (BFS
345///   order); the seed is never included as a node.
346///
347/// Unbounded traversal: delegates to [`neighborhood_capped`] with no node cap, so
348/// it expands every reachable node within `hops`. For a densely-interlinked store
349/// this is one full-store backlinks scan **per reached node** (O(visited × store))
350/// — prefer [`neighborhood_capped`] with a `max_nodes` cap to bound that work.
351pub fn neighborhood(
352    store: &Store,
353    seed: &Path,
354    hops: u32,
355    types: &[String],
356    direction: Direction,
357) -> Result<ContextSlice, StoreError> {
358    neighborhood_capped(store, seed, hops, types, direction, None)
359}
360
361/// [`neighborhood`] with a hard cap on how many nodes the BFS **traverses**.
362///
363/// `max_nodes` bounds the *traversal*, not just the result: each node the BFS
364/// expands triggers a per-node incoming-edge scan (an unscoped [`backlinks`] is a
365/// full-store ripgrep pass), so an uncapped neighborhood of a hub node costs
366/// O(visited × store). A post-hoc `.take(n)` on the returned nodes caps the
367/// *output* but not that work — the scans still run for every reached node. This
368/// cap stops discovering (and therefore stops scanning) once `max_nodes` distinct
369/// non-seed nodes have entered the BFS, so the expensive per-node scans are bounded
370/// to at most `max_nodes` of them. `None` is unbounded (the [`neighborhood`]
371/// behavior).
372///
373/// The cap is applied at *discovery* in BFS order, so the kept nodes are exactly
374/// the first `max_nodes` reached (closest-first by hop), and each still records its
375/// true minimum hop distance. Type-filtered (off-type) nodes count against the cap
376/// because the BFS must still traverse *through* them to reach deeper on-type
377/// nodes — the scan cost is paid when a node is expanded, on- or off-type alike.
378pub fn neighborhood_capped(
379    store: &Store,
380    seed: &Path,
381    hops: u32,
382    types: &[String],
383    direction: Direction,
384    max_nodes: Option<usize>,
385) -> Result<ContextSlice, StoreError> {
386    let seed_rel = PathBuf::from(normalize_target(seed));
387    let type_filter: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
388
389    // `discovered` guards against revisiting a node (and against re-adding the
390    // seed). BFS by levels so the first time we reach a node is its true min
391    // hop distance.
392    let mut discovered: HashSet<PathBuf> = HashSet::new();
393    discovered.insert(seed_rel.clone());
394
395    let mut nodes: Vec<ContextNode> = Vec::new();
396    let mut frontier: VecDeque<PathBuf> = VecDeque::new();
397    frontier.push_back(seed_rel.clone());
398
399    // Count of distinct non-seed nodes admitted to the BFS. Once it hits
400    // `max_nodes` we stop discovering new nodes, which stops enqueuing them, which
401    // stops the per-node full-store backlinks scan they would have triggered — the
402    // cap bounds the *traversal cost*, not only the printed result.
403    let mut admitted = 0usize;
404    let cap_reached = |admitted: usize| max_nodes.is_some_and(|cap| admitted >= cap);
405
406    let mut hop = 0u32;
407    while hop < hops && !frontier.is_empty() && !cap_reached(admitted) {
408        hop += 1;
409        let level_size = frontier.len();
410        for _ in 0..level_size {
411            if cap_reached(admitted) {
412                break;
413            }
414            let current = frontier.pop_front().expect("frontier non-empty");
415
416            // Collect this node's edges in the requested direction(s). Each
417            // edge carries the neighbor path + the direction we traversed it.
418            let mut edges: Vec<(PathBuf, Direction)> = Vec::new();
419            if matches!(direction, Direction::Outgoing | Direction::Both) {
420                for nbr in forwardlinks(store, &current)? {
421                    edges.push((nbr, Direction::Outgoing));
422                }
423            }
424            if matches!(direction, Direction::Incoming | Direction::Both) {
425                for nbr in backlinks(store, &current)? {
426                    edges.push((nbr, Direction::Incoming));
427                }
428            }
429
430            for (neighbor, dir) in edges {
431                if cap_reached(admitted) {
432                    break;
433                }
434                if !discovered.insert(neighbor.clone()) {
435                    continue;
436                }
437                admitted += 1;
438                let (summary, type_) = read_summary_and_type(store, &neighbor);
439                let include = type_filter.is_empty()
440                    || type_
441                        .as_deref()
442                        .map(|t| type_filter.contains(t))
443                        .unwrap_or(false);
444                if include {
445                    nodes.push(ContextNode {
446                        path: neighbor.clone(),
447                        summary,
448                        type_,
449                        hops: hop,
450                        via: Some((current.clone(), dir)),
451                    });
452                }
453                // Off-type nodes are not emitted but still seed the next BFS
454                // level, so the type filter narrows the *result*, not the
455                // reachable graph.
456                frontier.push_back(neighbor);
457            }
458        }
459    }
460
461    Ok(ContextSlice {
462        seed: seed_rel,
463        nodes,
464    })
465}
466
467/// **SWEEP.** Content files with no incoming AND no outgoing wiki-links — the
468/// curation worklist ("ingested but not yet wired into the wiki"). Off the
469/// loop. Optionally scoped to a layer.
470///
471/// A file is an orphan iff it neither links out to another store file nor is
472/// linked to by one. Incoming edges are counted across the *whole* store
473/// (a link from any layer un-orphans a file), even when `layer` scopes the
474/// candidate set. Returns store-relative paths, sorted.
475pub fn orphans(store: &Store, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
476    // One walk of the whole store: for every content file, record (a) whether
477    // it has any outgoing link, and (b) accumulate the set of every target any
478    // file links to (its incoming-edge set). Both come from a single read per
479    // file — the SWEEP cost.
480    let all = walk_content_files(store)?;
481
482    // `linked_to` holds case-folded edge KEYS (not raw paths): the link text may
483    // spell a target with different casing than the on-disk file (e.g.
484    // `[[records/contacts/Sarah-Chen]]` → `sarah-chen.md`), and on a
485    // case-insensitive filesystem that is a real incoming edge. Keying on
486    // `edge_key` so the incoming-edge lookup case-folds is what stops the
487    // false-positive orphan (a file with a live case-variant link reported as
488    // orphaned) — and matches validate, which resolves the same link via the
489    // case-insensitive filesystem.
490    let mut linked_to: HashSet<String> = HashSet::new();
491    let mut has_outgoing: HashMap<PathBuf, bool> = HashMap::new();
492
493    for abs in &all {
494        let rel = match rel_path(store, abs) {
495            Some(r) => r,
496            None => continue,
497        };
498        let self_key = edge_key(&normalize_target(&rel));
499
500        // Lossy decode (see `forwardlinks`): a non-UTF8 byte must not hide a
501        // `[[...]]` edge, or `orphans` would over-report BOTH endpoints of a live
502        // edge as orphans (and `stats` would inflate the orphan count) on a file
503        // with a stray Latin-1 byte beside a valid ASCII link line.
504        let body = match std::fs::read(abs) {
505            Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
506            Err(e) => return Err(StoreError::Io(e)),
507        };
508
509        let mut outgoing = false;
510        for target in extract_link_targets(&body) {
511            if target.is_empty() || edge_key(&target) == self_key {
512                continue;
513            }
514            if resolve_existing(store, Path::new(&target)).is_none() {
515                continue;
516            }
517            outgoing = true;
518            linked_to.insert(edge_key(&target));
519        }
520        has_outgoing.insert(rel, outgoing);
521    }
522
523    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
524    for abs in &all {
525        let rel = match rel_path(store, abs) {
526            Some(r) => r,
527            None => continue,
528        };
529        if let Some(layer) = layer {
530            if path_layer(&rel) != Some(layer) {
531                continue;
532            }
533        }
534        let outgoing = has_outgoing.get(&rel).copied().unwrap_or(false);
535        let incoming = linked_to.contains(&edge_key(&normalize_target(&rel)));
536        if !outgoing && !incoming {
537            out.insert(rel);
538        }
539    }
540
541    Ok(out.into_iter().collect())
542}
543
544/// **Write-side.** Rewrite every incoming `[[old]]` wiki-link in `text` to
545/// `[[new]]`, preserving any `|display` override and emitting the canonical bare
546/// target (no `.md`). The write-side twin of [`backlinks`]: where `backlinks`
547/// *finds* the files carrying an edge to `old`, this *retargets* that edge to
548/// `new` inside one file's contents.
549///
550/// `old` and `new` are store-relative paths in the wiki-link sense — both are
551/// passed through the same [`normalize_target`] the read side keys on, so the
552/// `.md` and bare spellings of `old` collapse to one target and a match here is
553/// exactly a match [`backlinks`] / [`Store::find_links_to`](crate::Store::find_links_to)
554/// would report. A link is rewritten iff its normalized target equals
555/// `normalize_target(old)`; prefix collisions (`old=a/b` vs `[[a/bc]]`) and
556/// short-form links never match. Returns the rewritten text (identical to the
557/// input when nothing matched), so the caller can cheaply detect a no-op.
558///
559/// Operates on the raw text (not a parser round-trip) so a link in frontmatter
560/// or body is retargeted uniformly and nothing else is reflowed — **except** a
561/// `[[...]]` inside a ``` fenced code block, which is a documentation example,
562/// not an edge: `rename` must NOT mutate fenced verbatim content (validate
563/// treats fenced links as non-edges, so rewriting them silently corrupts the
564/// example and makes rename disagree with validate). Matching is fence-aware,
565/// whitespace-trimmed, and case-folded to the filesystem, the exact edge notion
566/// [`backlinks`]/[`forwardlinks`] use — so rename retargets precisely the edges
567/// those report and nothing else.
568pub fn rewrite_links_to(text: &str, old: &Path, new: &Path) -> String {
569    let old_target = normalize_target(old);
570    let new_target = normalize_target(new);
571    if old_target.is_empty() {
572        // No target to match → never rewrite anything.
573        return text.to_string();
574    }
575    let old_key = edge_key(&old_target);
576
577    let mut out = String::with_capacity(text.len());
578    // Track the fence as a `(char, run length)` exactly like validate and
579    // `extract_edge_targets` (NOT a bool toggled on any ``` / ~~~ line). The
580    // naive toggle flips mid-block on a nested/indented/long-run fence, so a
581    // fenced example link would be rewritten — corrupting documentation and
582    // making rename disagree with validate's edge notion.
583    let mut fence: Option<(u8, usize)> = None;
584    // `split_inclusive` keeps each line's trailing `\n`, so copying a chunk
585    // verbatim preserves the original line endings exactly.
586    for line in text.split_inclusive('\n') {
587        // The fence rules key on line content without trailing `\r`/`\n`; the
588        // full chunk (line endings intact) is what we copy verbatim.
589        let content = line.trim_end_matches('\n').trim_end_matches('\r');
590        if let Some(f) = fence {
591            // Inside a fenced code block: copy verbatim, never rewrite. Only a
592            // matching closing fence ends the block.
593            if fence_closes(content, f) {
594                fence = None;
595            }
596            out.push_str(line);
597            continue;
598        }
599        if let Some(opened) = fence_opens(content) {
600            fence = Some(opened);
601            out.push_str(line);
602            continue;
603        }
604        rewrite_links_in_line(line, &old_key, &new_target, &mut out);
605    }
606    out
607}
608
609/// Rewrite every `[[...]]` on a single (non-fenced) line whose target matches
610/// `old_key`, appending the result to `out`. Preserves any `|display` override
611/// verbatim and emits the canonical bare `new_target`. A `[[...]]` whose target
612/// does not match (a prefix sibling, the short form, an unrelated target) is
613/// copied through untouched.
614fn rewrite_links_in_line(line: &str, old_key: &str, new_target: &str, out: &mut String) {
615    let bytes = line.as_bytes();
616    let mut i = 0usize;
617    let mut last = 0usize;
618    while i + 1 < bytes.len() {
619        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
620            if let Some(close) = line[i + 2..].find("]]") {
621                let inner = &line[i + 2..i + 2 + close];
622                // An embedded newline means this isn't a single-line link.
623                if !inner.contains('\n') {
624                    let (raw_target, display) = match inner.split_once('|') {
625                        Some((t, d)) => (t, Some(d)),
626                        None => (inner, None),
627                    };
628                    let raw_target = raw_target.trim();
629                    // Match on the SAME edge key the read side uses, so `[[old]]`,
630                    // `[[old.md]]`, `[[ ./old ]]`, and (case-insensitive FS)
631                    // `[[Old]]` all retarget while `[[old-jr]]` never does.
632                    if !raw_target.is_empty()
633                        && !raw_target.starts_with('[')
634                        && edge_key(&canonical_link_target(raw_target)) == old_key
635                    {
636                        out.push_str(&line[last..i]);
637                        out.push_str("[[");
638                        out.push_str(new_target);
639                        if let Some(display) = display {
640                            out.push('|');
641                            out.push_str(display);
642                        }
643                        out.push_str("]]");
644                        i = i + 2 + close + 2;
645                        last = i;
646                        continue;
647                    }
648                }
649                // Not a matching link: skip past this `]]` so an inner `[[`
650                // isn't re-scanned, but leave the text for the verbatim copy.
651                i = i + 2 + close + 2;
652                continue;
653            }
654        }
655        i += 1;
656    }
657    out.push_str(&line[last..]);
658}
659
660// ── Private helpers ─────────────────────────────────────────────────────────
661
662/// Normalize a store-relative path into the canonical wiki-link target form:
663/// forward slashes, no leading `./` or `/`, and no trailing `.md`. This is the
664/// canonical (case-PRESERVING) identity used for output and rewrites; edge
665/// *comparisons* go through [`edge_key`] so the `.md`/bare forms AND (on a
666/// case-insensitive filesystem) case-variant spellings of a target unify. The
667/// shared [`canonical_link_target`] is the single definition every db.md
668/// link op keys on.
669fn normalize_target(path: &Path) -> String {
670    canonical_link_target(&path.to_string_lossy())
671}
672
673/// The comparison key for an edge: the canonical target case-folded to the
674/// filesystem (identity on a case-sensitive FS, lowercased on macOS/Windows), so
675/// the string-keyed graph compares agree with the filesystem's case-insensitive
676/// `is_file()` resolution. `[[records/contacts/Sarah-Chen]]` and the on-disk
677/// `sarah-chen.md` must be the same edge on a case-insensitive filesystem or
678/// backlinks/orphans/rename silently disagree with validate.
679fn edge_key(canonical_target: &str) -> String {
680    link_edge_key(canonical_target)
681}
682
683/// Extract every wiki-link target from a body, normalized to the canonical
684/// store-relative form. Fence-aware and whitespace-trimmed via the shared
685/// [`extract_edge_targets`] — a `[[...]]` inside a ``` fenced code block is a
686/// documentation example, NOT an edge (matching validate), and `[[ x ]]`
687/// padding resolves identically to `[[x]]`. A target that would escape the store
688/// root (a `..` component) is dropped here too, so an escaping `[[../outside/x]]`
689/// is never reported as a forward edge and never seeds a [`neighborhood`]
690/// traversal out of the store (the disclosure vector validate flags as an
691/// error). Order-preserving; duplicates kept (callers dedup).
692fn extract_link_targets(body: &str) -> Vec<String> {
693    extract_edge_targets(body)
694        .into_iter()
695        .filter(|t| is_within_store_target(t))
696        .collect()
697}
698
699/// True if a canonical target stays inside the store: it has no `..`
700/// (`ParentDir`) component. The canonical form has already stripped any leading
701/// `./` or `/`, so a `Normal`-only path is a safe store-relative key; a `..`
702/// component is an escape and is rejected, mirroring validate's safe-path guard.
703fn is_within_store_target(target: &str) -> bool {
704    Path::new(target)
705        .components()
706        .all(|c| matches!(c, std::path::Component::Normal(_)))
707}
708
709/// Resolve the store root + a store-relative path to the absolute on-disk file,
710/// trying the path as written and then with a `.md` extension. `None` if neither
711/// exists **or if the target resolves outside the store root** — a `..`-laden or
712/// symlink-escaping wiki-link must never turn a graph read/traversal into a read
713/// of an arbitrary file outside the store (the `dbmd graph neighborhood`
714/// disclosure vector). Containment is enforced via the shared
715/// [`ensure_path_within_store`] gate, matching validate's safe-path guard.
716fn resolve_existing(store: &Store, store_relative: &Path) -> Option<PathBuf> {
717    let direct = store.root.join(store_relative);
718    if direct.is_file() && resolves_within_store(store, store_relative, &direct) {
719        return Some(direct);
720    }
721    let normalized = normalize_target(store_relative);
722    let with_md = store.root.join(format!("{normalized}.md"));
723    if with_md.is_file() && resolves_within_store(store, Path::new(&normalized), &with_md) {
724        return Some(with_md);
725    }
726    None
727}
728
729/// Containment check for a candidate on-disk path, with a cheap fast path. A
730/// store-relative path made of only `Normal` components (no `..`, no absolute /
731/// platform prefix) is trivially inside the root, so the common case avoids the
732/// `canonicalize` syscalls entirely. Anything with a `..`/absolute/prefix
733/// component falls through to the authoritative [`ensure_path_within_store`]
734/// gate (symlink-resolving), which is the only thing that can prove an escaping
735/// or symlink-redirected path actually stays inside the store.
736fn resolves_within_store(store: &Store, store_relative: &Path, abs: &Path) -> bool {
737    let plain_relative = !store_relative.is_absolute()
738        && store_relative
739            .components()
740            .all(|c| matches!(c, std::path::Component::Normal(_)));
741    if plain_relative {
742        return true;
743    }
744    ensure_path_within_store(&store.root, abs).is_ok()
745}
746
747/// Convert an absolute path under the store root into its store-relative form.
748fn rel_path(store: &Store, abs: &Path) -> Option<PathBuf> {
749    abs.strip_prefix(&store.root).ok().map(|p| p.to_path_buf())
750}
751
752/// Which layer a store-relative path sits in, by its first component.
753fn path_layer(rel: &Path) -> Option<Layer> {
754    let first = rel.components().next()?;
755    match first.as_os_str().to_str()? {
756        "sources" => Some(Layer::Sources),
757        "records" => Some(Layer::Records),
758        _ => None,
759    }
760}
761
762/// True if a store-relative path is a *content* file: under `sources/` or
763/// `records/`, a `.md` file, and not an `index.md`. Meta files
764/// (`DB.md`, `log.md`, `log/…`, sidecars) are excluded.
765fn is_content_rel(rel: &Path) -> bool {
766    if path_layer(rel).is_none() {
767        return false;
768    }
769    match rel.extension().and_then(|e| e.to_str()) {
770        Some("md") => {}
771        _ => return false,
772    }
773    rel.file_name().and_then(|n| n.to_str()) != Some("index.md")
774}
775
776/// Walk every content `.md` file in the store via the **`ignore`** walker
777/// (the ripgrep directory engine). Only the two layer roots
778/// (`sources/`/`records/`) are descended, so `DB.md`, `log.md`, and
779/// `log/` at the store root are structurally never reached; hidden dirs and
780/// per-folder `index.md` sidecars are filtered out ([`is_content_rel`]). Honors
781/// `.gitignore` the way `rg` does. Returns absolute paths. SWEEP-class.
782fn walk_content_files(store: &Store) -> Result<Vec<PathBuf>, StoreError> {
783    let mut out = Vec::new();
784    for layer in Layer::all() {
785        let dir = store.root.join(layer_dir_name(layer));
786        if !dir.is_dir() {
787            continue;
788        }
789        let walker = WalkBuilder::new(&dir)
790            .hidden(true)
791            .git_ignore(true)
792            .git_global(false)
793            .require_git(false)
794            // Follow symlinks so a symlinked `.md` content file or a symlinked
795            // type folder is walked like any other content (consistent with the
796            // store SWEEP walker), rather than silently vanishing from orphans.
797            .follow_links(true)
798            .build();
799        for result in walker {
800            let entry = result.map_err(|e| StoreError::Search {
801                root: store.root.clone(),
802                message: format!("walk failed: {e}"),
803            })?;
804            // A followed symlink entry reports its own type as `is_symlink()`, so
805            // also accept a symlink whose target is a regular file.
806            let is_file = match entry.file_type() {
807                Some(ft) if ft.is_file() => true,
808                Some(ft) if ft.is_symlink() => std::fs::metadata(entry.path())
809                    .map(|m| m.is_file())
810                    .unwrap_or(false),
811                _ => false,
812            };
813            if !is_file {
814                continue;
815            }
816            let abs = entry.into_path();
817            if let Some(rel) = rel_path(store, &abs) {
818                if is_content_rel(&rel) {
819                    out.push(abs);
820                }
821            }
822        }
823    }
824    Ok(out)
825}
826
827/// The on-disk folder name for a layer. Mirrors `Layer::dir_name`; kept local
828/// so the graph module owns its own copy rather than coupling to that body.
829fn layer_dir_name(layer: Layer) -> &'static str {
830    match layer {
831        Layer::Sources => "sources",
832        Layer::Records => "records",
833    }
834}
835
836/// Read a reached node's `summary` and `type` from its frontmatter. A missing
837/// file, missing frontmatter, or unparseable YAML degrades to an empty summary
838/// / unknown type rather than failing the whole hydration — `neighborhood` is
839/// best-effort context assembly, not validation.
840fn read_summary_and_type(store: &Store, rel: &Path) -> (String, Option<String>) {
841    let abs = match resolve_existing(store, rel) {
842        Some(a) => a,
843        None => return (String::new(), None),
844    };
845    // Lossy decode so a node's summary/type still resolve when the file carries
846    // a stray non-UTF8 byte (consistent with the edge readers above).
847    let text = match std::fs::read(&abs) {
848        Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
849        Err(_) => return (String::new(), None),
850    };
851    let yaml = match frontmatter_block(&text) {
852        Some(y) => y,
853        None => return (String::new(), None),
854    };
855    let value: serde_norway::Value = match serde_norway::from_str(yaml) {
856        Ok(v) => v,
857        Err(_) => return (String::new(), None),
858    };
859    let summary = value
860        .get("summary")
861        .and_then(|v| v.as_str())
862        .unwrap_or("")
863        .to_string();
864    let type_ = value
865        .get("type")
866        .and_then(|v| v.as_str())
867        .map(|s| s.to_string());
868    (summary, type_)
869}
870
871/// Return the YAML between the opening and closing `---` fences (exclusive), or
872/// `None` if the text has no leading frontmatter block. Local mirror of the
873/// parser's split so the graph module stays self-contained.
874fn frontmatter_block(text: &str) -> Option<&str> {
875    // Tolerate a single leading UTF-8 BOM, matching parser/store/index/validate.
876    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
877    let rest = text
878        .strip_prefix("---\n")
879        .or_else(|| text.strip_prefix("---\r\n"))?;
880    // Find the closing fence: a line that is exactly `---`.
881    let mut idx = 0usize;
882    for line in rest.split_inclusive('\n') {
883        let trimmed = line.trim_end_matches(['\r', '\n']);
884        if trimmed == "---" {
885            return Some(&rest[..idx]);
886        }
887        idx += line.len();
888    }
889    None
890}
891
892#[cfg(test)]
893mod tests {
894    use super::*;
895    use std::fs;
896    use tempfile::TempDir;
897
898    use crate::parser::Config;
899
900    // ── Fixture builder ─────────────────────────────────────────────────────
901    //
902    // A real on-disk store in a tempdir. We write actual files (frontmatter +
903    // wiki-links) and exercise the real code paths. The fixture constructs the
904    // `Store` by its public fields rather than `Store::open`, so the graph
905    // tests stand on their own and do not depend on any other module's
906    // behavior. Each test asserts the behavior the SPEC promises, derived from
907    // intent, never from echoing the function's own output.
908    //
909    // `backlinks` (and `neighborhood` in any incoming direction) enumerate their
910    // candidate set from the type-folder `index.jsonl` sidecars — the loop
911    // contract: never a whole-store content walk. A real db.md store maintains
912    // those sidecars write-through, so a test that exercises backlinks must call
913    // [`Fixture::reindex`] after writing its files to build them (the SWEEP that
914    // `dbmd index rebuild` runs). Forwardlinks/orphans read content directly and
915    // need no sidecar.
916
917    struct Fixture {
918        _tmp: TempDir,
919        store: Store,
920    }
921
922    impl Fixture {
923        fn new() -> Self {
924            let tmp = TempDir::new().expect("tempdir");
925            let root = tmp.path().to_path_buf();
926            fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n# store\n").expect("DB.md");
927            let store = Store {
928                root,
929                config: Config::default(),
930            };
931            Fixture { _tmp: tmp, store }
932        }
933
934        /// Write a content file at a store-relative path with the given type,
935        /// summary, and body. Creates parent dirs.
936        fn write(&self, rel: &str, type_: &str, summary: &str, body: &str) {
937            let abs = self.store.root.join(rel);
938            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
939            let contents = format!(
940                "---\ntype: {type_}\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: {summary}\n---\n{body}\n"
941            );
942            fs::write(&abs, contents).expect("write file");
943        }
944
945        /// Write a raw file verbatim (for frontmatter-shape edge cases).
946        fn write_raw(&self, rel: &str, contents: &str) {
947            let abs = self.store.root.join(rel);
948            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
949            fs::write(&abs, contents).expect("write raw");
950        }
951
952        /// Build the type-folder `index.jsonl` sidecars from the content written
953        /// so far — the state a real store is always in (write-through), and the
954        /// candidate set `backlinks` reads. Call after writing files in any test
955        /// that exercises `backlinks` or an incoming-direction `neighborhood`.
956        fn reindex(&self) {
957            crate::index::Index::rebuild_all(&self.store).expect("rebuild sidecars");
958        }
959
960        fn p(&self, rel: &str) -> PathBuf {
961            PathBuf::from(rel)
962        }
963    }
964
965    fn paths(v: &[PathBuf]) -> Vec<String> {
966        v.iter()
967            .map(|p| p.to_string_lossy().replace('\\', "/"))
968            .collect()
969    }
970
971    // ── normalize_target ────────────────────────────────────────────────────
972
973    #[test]
974    fn normalize_strips_md_and_leading_dotslash() {
975        assert_eq!(
976            normalize_target(Path::new("records/contacts/sarah.md")),
977            "records/contacts/sarah"
978        );
979        assert_eq!(
980            normalize_target(Path::new("./records/profiles/elena")),
981            "records/profiles/elena"
982        );
983        assert_eq!(normalize_target(Path::new("/records/x")), "records/x");
984        // Bare and `.md` forms must collapse to the same key, or edges won't unify.
985        assert_eq!(
986            normalize_target(Path::new("a/b")),
987            normalize_target(Path::new("a/b.md"))
988        );
989    }
990
991    // ── extract_link_targets (forwardlinks core) ────────────────────────────
992
993    #[test]
994    fn extract_handles_display_text_and_md_suffix() {
995        let body = "See [[records/profiles/sarah-chen|Sarah]] and [[records/contacts/elena.md]].";
996        let got = extract_link_targets(body);
997        assert_eq!(
998            got,
999            vec!["records/profiles/sarah-chen", "records/contacts/elena"]
1000        );
1001    }
1002
1003    #[test]
1004    fn extract_ignores_external_markdown_links() {
1005        // Standard markdown links are NOT wiki-links and must not be extracted
1006        // (SPEC: external refs don't participate in the graph).
1007        let body = "[Acme](https://acme.io) but [[records/companies/acme]] is internal.";
1008        let got = extract_link_targets(body);
1009        assert_eq!(got, vec!["records/companies/acme"]);
1010    }
1011
1012    #[test]
1013    fn extract_display_text_is_not_treated_as_a_target() {
1014        // A `|display` segment that looks path-like must not become a target;
1015        // only the part before `|` is the link target.
1016        let body = "[[records/contacts/sarah|sources/emails/decoy]]";
1017        let got = extract_link_targets(body);
1018        assert_eq!(got, vec!["records/contacts/sarah"]);
1019    }
1020
1021    // ── rewrite_links_to (write-side twin of backlinks) ─────────────────────
1022
1023    #[test]
1024    fn rewrite_plain_link_to_canonical_new_target() {
1025        let got = rewrite_links_to(
1026            "See [[records/contacts/sarah-chen]] today.",
1027            Path::new("records/contacts/sarah-chen"),
1028            Path::new("records/contacts/sarah-chen-acme"),
1029        );
1030        assert_eq!(got, "See [[records/contacts/sarah-chen-acme]] today.");
1031    }
1032
1033    #[test]
1034    fn rewrite_preserves_display_override() {
1035        let got = rewrite_links_to(
1036            "With [[records/contacts/sarah-chen|Sarah]].",
1037            Path::new("records/contacts/sarah-chen"),
1038            Path::new("records/contacts/sarah-chen-acme"),
1039        );
1040        assert_eq!(got, "With [[records/contacts/sarah-chen-acme|Sarah]].");
1041    }
1042
1043    #[test]
1044    fn rewrite_matches_md_suffixed_old_and_emits_bare_new() {
1045        // The `.md` spelling of the old target must match (it normalizes to the
1046        // same key the read side uses), and the new target is emitted bare —
1047        // the writer doctrine validate enforces (`WIKI_LINK_HAS_EXTENSION`).
1048        let got = rewrite_links_to(
1049            "[[records/contacts/sarah-chen.md]]",
1050            Path::new("records/contacts/sarah-chen"),
1051            Path::new("records/contacts/new.md"),
1052        );
1053        assert_eq!(got, "[[records/contacts/new]]");
1054    }
1055
1056    #[test]
1057    fn rewrite_leaves_prefix_collisions_and_short_form_untouched() {
1058        // Boundary correctness, anchored to the SAME normalize_target the read
1059        // side keys on: `records/contacts/sarah-chen` must NOT match the longer
1060        // `[[…-jr]]`, the short-form `[[sarah-chen]]`, or an unrelated target.
1061        let input = "[[records/contacts/sarah-chen-jr]] [[sarah-chen]] [[records/concepts/x]]";
1062        let got = rewrite_links_to(
1063            input,
1064            Path::new("records/contacts/sarah-chen"),
1065            Path::new("records/contacts/new"),
1066        );
1067        assert_eq!(got, input, "no genuine edge to the seed → text unchanged");
1068    }
1069
1070    #[test]
1071    fn rewrite_handles_multiple_occurrences_and_mixed_spellings() {
1072        let got = rewrite_links_to(
1073            "[[records/x]] then [[./records/x]] and [[records/x.md|d]] end",
1074            Path::new("records/x"),
1075            Path::new("records/y"),
1076        );
1077        // All three spellings of the same target retarget; the display survives.
1078        assert_eq!(
1079            got,
1080            "[[records/y]] then [[records/y]] and [[records/y|d]] end"
1081        );
1082    }
1083
1084    #[test]
1085    fn rewrite_retargets_exactly_the_edges_the_core_parser_sees() {
1086        // The load-bearing property of moving the rewrite into core: the write
1087        // side must operate on EXACTLY the edge set the read side recognizes —
1088        // the same `extract_link_targets` / `normalize_target` grammar that
1089        // `forwardlinks` is built on. Anchor the test to that grammar (via
1090        // `forwardlinks` on a real file) rather than re-listing literals, so a
1091        // future divergence between the read parser and the write rewrite fails
1092        // here. (Coupled to `forwardlinks` — the single-file edge extractor —
1093        // not the multi-file `backlinks` traversal, so it tests the grammar, not
1094        // the walk.)
1095        let fx = Fixture::new();
1096        let body = "Met [[records/contacts/sarah.md|Sarah]] and not [[records/contacts/sarah-2]].";
1097        fx.write("records/profiles/bio.md", "profile", "bio", body);
1098
1099        // Read side: the parser sees two outgoing edges, both in canonical bare
1100        // form (the `.md` spelling collapsed). `sarah` is a real edge here.
1101        let edges = forwardlinks(&fx.store, &fx.p("records/profiles/bio.md")).unwrap();
1102        assert_eq!(
1103            paths(&edges),
1104            vec!["records/contacts/sarah", "records/contacts/sarah-2"],
1105            "fixture must contain exactly the two edges this test reasons about"
1106        );
1107
1108        // Write side: rewriting `sarah → sarah-chen` must retarget the edge the
1109        // parser recognized (matching the `.md` spelling), preserve the display,
1110        // and leave the unrelated `sarah-2` edge untouched.
1111        let got = rewrite_links_to(
1112            body,
1113            Path::new("records/contacts/sarah"),
1114            Path::new("records/contacts/sarah-chen"),
1115        );
1116        assert_eq!(
1117            got,
1118            "Met [[records/contacts/sarah-chen|Sarah]] and not [[records/contacts/sarah-2]]."
1119        );
1120
1121        // Cross-check through the parser: the rewritten text's edge set is the
1122        // original with `sarah` swapped for `sarah-chen` — proving the rewrite
1123        // moved exactly one edge, the one the read side keyed on.
1124        fx.write("records/profiles/bio.md", "profile", "bio", &got);
1125        let after = forwardlinks(&fx.store, &fx.p("records/profiles/bio.md")).unwrap();
1126        assert_eq!(
1127            paths(&after),
1128            vec!["records/contacts/sarah-2", "records/contacts/sarah-chen"],
1129            "after rewrite the parser must see the new target and not the old"
1130        );
1131    }
1132
1133    #[test]
1134    fn rewrite_empty_old_target_is_a_no_op() {
1135        // A degenerate `old` (normalizes to empty) must never rewrite anything,
1136        // mirroring backlinks' empty-target guard.
1137        let input = "[[records/x]] [[]] text";
1138        let got = rewrite_links_to(input, Path::new(""), Path::new("records/y"));
1139        assert_eq!(got, input);
1140    }
1141
1142    #[test]
1143    fn rewrite_no_match_returns_input_unchanged() {
1144        let input = "no links, [external](https://x), and [[records/concepts/y]]";
1145        let got = rewrite_links_to(input, Path::new("records/x"), Path::new("records/z"));
1146        assert_eq!(got, input);
1147    }
1148
1149    #[test]
1150    fn rewrite_does_not_corrupt_links_in_nested_or_long_run_fences() {
1151        // Regression for the naive `starts_with("```")/("~~~")` toggle in the
1152        // rewriter: a fenced example documenting wiki-link syntax must be copied
1153        // VERBATIM, never retargeted — matching validate's edge notion. The
1154        // standard nested-fence convention (a ````-run block wrapping a ```
1155        // example) used to flip the bool mid-block, so the example link was
1156        // rewritten (silent documentation corruption).
1157        let body = "\
1158Here is how to write a link:
1159
1160````
1161```
1162[[records/contacts/bob]]
1163```
1164still fenced [[records/contacts/bob]]
1165````
1166
1167Real link: [[records/contacts/bob]].
1168";
1169        let got = rewrite_links_to(
1170            body,
1171            Path::new("records/contacts/bob"),
1172            Path::new("records/contacts/robert"),
1173        );
1174        // The two fenced examples are untouched; only the real link retargets.
1175        let expected = "\
1176Here is how to write a link:
1177
1178````
1179```
1180[[records/contacts/bob]]
1181```
1182still fenced [[records/contacts/bob]]
1183````
1184
1185Real link: [[records/contacts/robert]].
1186";
1187        assert_eq!(
1188            got, expected,
1189            "fenced example links must survive a rename verbatim; only live edges retarget"
1190        );
1191    }
1192
1193    // ── forwardlinks ─────────────────────────────────────────────────────────
1194
1195    #[test]
1196    fn forwardlinks_returns_sorted_deduped_targets_excluding_self() {
1197        let fx = Fixture::new();
1198        fx.write(
1199            "records/projects/renewal.md",
1200            "synthesis",
1201            "Renewal project",
1202            "Links: [[records/contacts/sarah]] [[records/companies/acme]] [[records/contacts/sarah]] and itself [[records/projects/renewal]].",
1203        );
1204        // The targets need not exist on disk for forwardlinks (it reads the one
1205        // file only). Self-links are dropped; duplicates collapse; sorted asc.
1206        let got = forwardlinks(&fx.store, &fx.p("records/projects/renewal.md")).unwrap();
1207        assert_eq!(
1208            paths(&got),
1209            vec!["records/companies/acme", "records/contacts/sarah"]
1210        );
1211    }
1212
1213    #[test]
1214    fn forwardlinks_picks_up_wiki_links_in_frontmatter() {
1215        // SPEC: wiki-links appear in scalar + block-sequence frontmatter fields,
1216        // not just the body. forwardlinks must follow those edges too.
1217        let fx = Fixture::new();
1218        fx.write_raw(
1219            "records/meetings/m1.md",
1220            "---\ntype: meeting\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Renewal sync\ncompany: [[records/companies/acme]]\nattendees:\n  - [[records/contacts/sarah]]\n  - [[records/contacts/elena]]\n---\nNotes about [[records/projects/renewal]].\n",
1221        );
1222        let got = forwardlinks(&fx.store, &fx.p("records/meetings/m1.md")).unwrap();
1223        assert_eq!(
1224            paths(&got),
1225            vec![
1226                "records/companies/acme",
1227                "records/contacts/elena",
1228                "records/contacts/sarah",
1229                "records/projects/renewal",
1230            ]
1231        );
1232    }
1233
1234    #[test]
1235    fn forwardlinks_missing_file_is_empty_not_error() {
1236        let fx = Fixture::new();
1237        let got = forwardlinks(&fx.store, &fx.p("records/profiles/ghost.md")).unwrap();
1238        assert!(got.is_empty());
1239    }
1240
1241    #[test]
1242    fn forwardlinks_resolves_seed_given_without_md_extension() {
1243        let fx = Fixture::new();
1244        fx.write(
1245            "records/profiles/sarah.md",
1246            "profile",
1247            "Sarah bio",
1248            "Works at [[records/companies/acme]].",
1249        );
1250        // Seed passed in bare wiki-link form (no `.md`) must still resolve.
1251        let got = forwardlinks(&fx.store, &fx.p("records/profiles/sarah")).unwrap();
1252        assert_eq!(paths(&got), vec!["records/companies/acme"]);
1253    }
1254
1255    // ── backlinks ──────────────────────────────────────────────────────────
1256
1257    #[test]
1258    fn backlinks_finds_incoming_across_layers_and_link_forms() {
1259        let fx = Fixture::new();
1260        // Target.
1261        fx.write("records/contacts/sarah.md", "contact", "Sarah Chen", "");
1262        // Three different incoming-link spellings, all to the same target.
1263        fx.write(
1264            "records/profiles/sarah.md",
1265            "profile",
1266            "bio",
1267            "See [[records/contacts/sarah]].",
1268        );
1269        fx.write(
1270            "records/meetings/m1.md",
1271            "meeting",
1272            "Renewal call",
1273            "Attendee [[records/contacts/sarah|Sarah]].",
1274        );
1275        fx.write(
1276            "sources/emails/e1.md",
1277            "email",
1278            "Hi",
1279            "From [[records/contacts/sarah.md]] today.",
1280        );
1281        // A file that links to a DIFFERENT contact must not be a backlink.
1282        fx.write(
1283            "records/profiles/other.md",
1284            "profile",
1285            "x",
1286            "[[records/contacts/sarah-2]]",
1287        );
1288        fx.reindex();
1289
1290        // All three link forms ([[x]], [[x|d]], [[x.md]]) resolve to the same
1291        // target and are found; the linkers are returned in canonical bare form.
1292        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1293        assert_eq!(
1294            paths(&got),
1295            vec![
1296                "records/meetings/m1",
1297                "records/profiles/sarah",
1298                "sources/emails/e1",
1299            ]
1300        );
1301    }
1302
1303    #[test]
1304    fn backlinks_and_forwardlinks_round_trip_on_same_key() {
1305        // If A forwardlinks to B, then B backlinks to A — both expressed in the
1306        // identical bare key, so neighborhood can dedup across directions.
1307        let fx = Fixture::new();
1308        fx.write(
1309            "records/profiles/a.md",
1310            "profile",
1311            "A",
1312            "Knows [[records/profiles/b]].",
1313        );
1314        fx.write("records/profiles/b.md", "profile", "B", "");
1315        fx.reindex();
1316        let fwd = forwardlinks(&fx.store, &fx.p("records/profiles/a.md")).unwrap();
1317        let back = backlinks(&fx.store, &fx.p("records/profiles/b.md")).unwrap();
1318        assert_eq!(paths(&fwd), vec!["records/profiles/b"]);
1319        assert_eq!(paths(&back), vec!["records/profiles/a"]);
1320    }
1321
1322    #[test]
1323    fn backlinks_does_not_match_path_prefix_collisions() {
1324        let fx = Fixture::new();
1325        fx.write("records/contacts/sam.md", "contact", "Sam", "");
1326        // `sam-smith` shares the `sam` prefix; must NOT count as a backlink to `sam`.
1327        fx.write(
1328            "records/profiles/x.md",
1329            "profile",
1330            "x",
1331            "[[records/contacts/sam-smith]]",
1332        );
1333        // The genuine backlink.
1334        fx.write(
1335            "records/profiles/y.md",
1336            "profile",
1337            "y",
1338            "[[records/contacts/sam]]",
1339        );
1340        fx.reindex();
1341
1342        let got = backlinks(&fx.store, &fx.p("records/contacts/sam")).unwrap();
1343        assert_eq!(paths(&got), vec!["records/profiles/y"]);
1344    }
1345
1346    #[test]
1347    fn backlinks_excludes_self_reference() {
1348        let fx = Fixture::new();
1349        // A page that links to itself is not its own backlink.
1350        fx.write(
1351            "records/synthesis/overview.md",
1352            "synthesis",
1353            "Overview",
1354            "This page [[records/synthesis/overview]] references itself.",
1355        );
1356        fx.reindex();
1357        let got = backlinks(&fx.store, &fx.p("records/synthesis/overview.md")).unwrap();
1358        assert!(
1359            got.is_empty(),
1360            "self-link must not appear as a backlink, got {got:?}"
1361        );
1362    }
1363
1364    #[test]
1365    fn backlinks_empty_when_nobody_links() {
1366        let fx = Fixture::new();
1367        fx.write("records/contacts/lonely.md", "contact", "Lonely", "");
1368        fx.write(
1369            "records/profiles/unrelated.md",
1370            "profile",
1371            "x",
1372            "[[records/companies/acme]]",
1373        );
1374        fx.reindex();
1375        let got = backlinks(&fx.store, &fx.p("records/contacts/lonely.md")).unwrap();
1376        assert!(got.is_empty());
1377    }
1378
1379    #[test]
1380    fn backlinks_ignores_index_and_meta_files() {
1381        let fx = Fixture::new();
1382        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1383        // An index.md that lists the target must NOT be reported as a backlink
1384        // (indexes are catalog, not relationship edges).
1385        fx.write_raw(
1386            "records/contacts/index.md",
1387            "---\ntype: index\nscope: folder\nfolder: records/contacts\n---\n- [[records/contacts/sarah]] — Sarah\n",
1388        );
1389        fx.reindex();
1390        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1391        assert!(got.is_empty(), "index.md must be excluded, got {got:?}");
1392    }
1393
1394    #[test]
1395    fn backlinks_finds_body_only_edge_not_in_frontmatter_links_field() {
1396        // REGRESSION: the sidecar's `links` field carries only the file's
1397        // frontmatter `links:` list; it does NOT include wiki-links written in
1398        // the body or in other typed frontmatter fields. Answering backlinks
1399        // from `links[]` alone would silently miss this edge. The candidate set
1400        // is sidecar-bounded, but each candidate's edge is confirmed by parsing
1401        // the file (the same extraction forwardlinks uses), so a body-only link
1402        // must still register as a backlink.
1403        let fx = Fixture::new();
1404        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1405        // `meeting.md` links to sarah ONLY in its body — its frontmatter has no
1406        // `links:` field at all, so the sidecar record's `links` is empty.
1407        fx.write(
1408            "records/meetings/standup.md",
1409            "meeting",
1410            "Standup",
1411            "Discussed renewal with [[records/contacts/sarah]].",
1412        );
1413        fx.reindex();
1414
1415        // Guard the premise: the sidecar record really does carry an empty
1416        // `links` (so this test fails loudly if the index ever starts extracting
1417        // body links — at which point the backlink predicate could be revisited).
1418        let rec = fx
1419            .store
1420            .find_by_type("meeting")
1421            .unwrap()
1422            .into_iter()
1423            .find(|r| r.path == fx.p("records/meetings/standup.md"))
1424            .expect("meeting is catalogued in its sidecar");
1425        assert!(
1426            rec.links.is_empty(),
1427            "premise: the body link is NOT projected into the sidecar `links` field; got {:?}",
1428            rec.links
1429        );
1430
1431        // Yet backlinks still finds it — because it confirms via the file parse,
1432        // not via the sidecar `links` field.
1433        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1434        assert_eq!(
1435            paths(&got),
1436            vec!["records/meetings/standup"],
1437            "a body-only wiki-link must register as a backlink"
1438        );
1439    }
1440
1441    #[test]
1442    fn backlinks_finds_edge_in_typed_frontmatter_field() {
1443        // A wiki-link inside a *typed* frontmatter field (`company:`) is a real
1444        // edge forwardlinks follows, so backlinks must find it too — even though
1445        // the sidecar's `links` field (the `links:` key only) does not list it.
1446        let fx = Fixture::new();
1447        fx.write("records/companies/acme.md", "company", "Acme", "");
1448        fx.write_raw(
1449            "records/contacts/sarah.md",
1450            "---\ntype: contact\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Sarah\ncompany: [[records/companies/acme]]\n---\nBody with no links.\n",
1451        );
1452        fx.reindex();
1453        let got = backlinks(&fx.store, &fx.p("records/companies/acme.md")).unwrap();
1454        assert_eq!(
1455            paths(&got),
1456            vec!["records/contacts/sarah"],
1457            "a wiki-link in a typed frontmatter field is an incoming edge"
1458        );
1459    }
1460
1461    #[test]
1462    fn backlinks_unscoped_scans_the_tree_not_only_the_sidecar() {
1463        // REGRESSION (loop budget): an UNSCOPED `backlinks` must resolve incoming
1464        // edges with a SINGLE embedded-ripgrep pass over the tree
1465        // (`Store::find_links_to`), NOT by reading the sidecar candidate set and
1466        // then `read_to_string`-confirming each candidate (which re-opens every
1467        // content file → O(store); the documented >3x budget miss). A ripgrep
1468        // pass is the same scan engine `validate`/`rename`/`dbmd links` ride, and
1469        // the tree — not the sidecar — is its ground truth: a linker that is on
1470        // disk but absent from every sidecar (stale / never-built index) is still
1471        // found. We assert that behaviorally, which fails loudly if the unscoped
1472        // path ever reverts to the sidecar-bounded per-candidate confirm loop
1473        // (that loop would NOT find the unindexed linker).
1474        let fx = Fixture::new();
1475        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1476        fx.write(
1477            "records/profiles/indexed.md",
1478            "profile",
1479            "Indexed",
1480            "[[records/contacts/sarah]]",
1481        );
1482        fx.reindex(); // builds sidecars for sarah + the indexed linker
1483
1484        // Now drop a NEW linker on disk WITHOUT reindexing — it is on disk but in
1485        // no sidecar.
1486        fx.write(
1487            "records/profiles/unindexed.md",
1488            "profile",
1489            "Unindexed",
1490            "[[records/contacts/sarah]]",
1491        );
1492
1493        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1494        assert_eq!(
1495            paths(&got),
1496            vec!["records/profiles/indexed", "records/profiles/unindexed"],
1497            "unscoped backlinks ripgrep-scans the tree, so the on-disk-but-unindexed \
1498             linker is found too — not only the sidecar-catalogued one"
1499        );
1500    }
1501
1502    #[test]
1503    fn backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk() {
1504        // REGRESSION (scale contract): the SCOPED form (`--type` / `--in`) is the
1505        // I/O-scoped path — it enumerates candidates from the relevant type-folder
1506        // `index.jsonl` sidecars and parses only those, NOT a whole-tree walk.
1507        // That is what makes the scope an I/O scope, not just a result filter:
1508        // a linker that is on disk but ABSENT from the sidecar (stale / never-built
1509        // index) is NOT discovered by the scoped call (the sidecar bounds which
1510        // files are candidates). This is the loop-vs-walk distinction the SPEC
1511        // draws, and it is exactly the inverse of the unscoped tree scan above.
1512        let fx = Fixture::new();
1513        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1514        fx.write(
1515            "records/profiles/indexed.md",
1516            "profile",
1517            "Indexed",
1518            "[[records/contacts/sarah]]",
1519        );
1520        fx.reindex(); // builds sidecars for sarah + the indexed linker
1521
1522        // Drop a NEW profile linker on disk WITHOUT reindexing — on disk, in no
1523        // sidecar.
1524        fx.write(
1525            "records/profiles/unindexed.md",
1526            "profile",
1527            "Unindexed",
1528            "[[records/contacts/sarah]]",
1529        );
1530
1531        // Scoped to the `profile` type: the candidate set is the sidecar's, so
1532        // only the catalogued linker is found — the unindexed one is invisible.
1533        let only_profiles = vec!["profile".to_string()];
1534        let got = backlinks_filtered(
1535            &fx.store,
1536            &fx.p("records/contacts/sarah.md"),
1537            &only_profiles,
1538            None,
1539        )
1540        .unwrap();
1541        assert_eq!(
1542            paths(&got),
1543            vec!["records/profiles/indexed"],
1544            "scoped backlinks reads the sidecar candidate set; the on-disk-but-unindexed \
1545             linker is not tree-walked"
1546        );
1547    }
1548
1549    #[test]
1550    fn backlinks_filtered_type_scopes_the_candidate_set() {
1551        // `--type` narrows backlinks to linkers of that type. Two files link to
1552        // the target — one `meeting`, one `profile`; filtering to `meeting`
1553        // returns only the meeting.
1554        let fx = Fixture::new();
1555        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1556        fx.write(
1557            "records/meetings/m1.md",
1558            "meeting",
1559            "Call",
1560            "[[records/contacts/sarah]]",
1561        );
1562        fx.write(
1563            "records/profiles/bio.md",
1564            "profile",
1565            "Bio",
1566            "[[records/contacts/sarah]]",
1567        );
1568        fx.reindex();
1569
1570        let only_meetings = vec!["meeting".to_string()];
1571        let got = backlinks_filtered(
1572            &fx.store,
1573            &fx.p("records/contacts/sarah.md"),
1574            &only_meetings,
1575            None,
1576        )
1577        .unwrap();
1578        assert_eq!(
1579            paths(&got),
1580            vec!["records/meetings/m1"],
1581            "--type meeting must exclude the profile linker"
1582        );
1583
1584        // Unfiltered, both come back — proving the filter (not the data) dropped one.
1585        let all = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1586        assert_eq!(
1587            paths(&all),
1588            vec!["records/meetings/m1", "records/profiles/bio"]
1589        );
1590    }
1591
1592    #[test]
1593    fn backlinks_filtered_layer_scopes_the_candidate_set() {
1594        // `--in <layer>` narrows backlinks to linkers under that layer. The two
1595        // linkers live in different layers (a sources email and a records
1596        // meeting) so the scope genuinely separates them.
1597        let fx = Fixture::new();
1598        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1599        fx.write(
1600            "records/meetings/m1.md",
1601            "meeting",
1602            "Call",
1603            "[[records/contacts/sarah]]",
1604        );
1605        fx.write(
1606            "sources/emails/intro.md",
1607            "email",
1608            "Intro",
1609            "[[records/contacts/sarah]]",
1610        );
1611        fx.reindex();
1612
1613        let got = backlinks_filtered(
1614            &fx.store,
1615            &fx.p("records/contacts/sarah.md"),
1616            &[],
1617            Some(Layer::Sources),
1618        )
1619        .unwrap();
1620        assert_eq!(
1621            paths(&got),
1622            vec!["sources/emails/intro"],
1623            "--in sources must keep only the sources-layer linker"
1624        );
1625
1626        let records_only = backlinks_filtered(
1627            &fx.store,
1628            &fx.p("records/contacts/sarah.md"),
1629            &[],
1630            Some(Layer::Records),
1631        )
1632        .unwrap();
1633        assert_eq!(paths(&records_only), vec!["records/meetings/m1"]);
1634    }
1635
1636    #[test]
1637    fn backlinks_scoped_type_spans_all_topic_folders_in_its_layer() {
1638        // REGRESSION (finding #12): a `type` can legitimately span several folders
1639        // within one layer — a `profile` is filed under its canonical
1640        // `records/profiles/` folder, but an agent may also file a profile under
1641        // another `records/<folder>/` (the type, not the folder, is authoritative).
1642        // The scoped candidate set must read the whole `records/` layer and filter
1643        // by type, NOT just the canonical-guess folder `records/profiles/`. Before
1644        // the fix, `find_by_type("profile")` read ONLY `records/profiles/index.jsonl`
1645        // whenever that sidecar existed, silently dropping every profile linker
1646        // filed under any other folder — so `backlinks --type profile` under-reported
1647        // dependents (a wrong blast-radius check) the moment a `records/profiles/`
1648        // page also existed.
1649        //
1650        // The trigger needs BOTH: a populated `records/profiles/` (so its canonical
1651        // sidecar exists) AND a profile elsewhere in the layer that links the
1652        // target. The earlier
1653        // `backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk` test
1654        // masks this bug precisely because its fixture has no `records/profiles/`.
1655        let fx = Fixture::new();
1656        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1657        // A profile in the CANONICAL type folder, NOT linking the target — its
1658        // only purpose is to make `records/profiles/index.jsonl` exist on disk.
1659        fx.write(
1660            "records/profiles/glossary.md",
1661            "profile",
1662            "Glossary",
1663            "No link to sarah here.",
1664        );
1665        // A profile in a NON-canonical folder that DOES link the target.
1666        fx.write(
1667            "records/people/sarah.md",
1668            "profile",
1669            "Sarah bio",
1670            "Profile of [[records/contacts/sarah]].",
1671        );
1672        fx.reindex(); // builds records/profiles/index.jsonl AND records/people/index.jsonl
1673
1674        // Scoped to `profile`: the off-canonical linker MUST be found. Pre-fix,
1675        // the candidate set was only `records/profiles/`'s sidecar, so this was empty.
1676        let scoped = backlinks_filtered(
1677            &fx.store,
1678            &fx.p("records/contacts/sarah.md"),
1679            &["profile".to_string()],
1680            None,
1681        )
1682        .unwrap();
1683        assert_eq!(
1684            paths(&scoped),
1685            vec!["records/people/sarah"],
1686            "a profile filed outside records/profiles/ must still be a scoped backlink"
1687        );
1688
1689        // Cross-check: the unscoped path (ripgrep tree scan) finds the same single
1690        // linker, proving the scoped result is now complete — not over- or
1691        // under-counting — and that the data was real all along.
1692        let unscoped = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1693        assert_eq!(
1694            paths(&unscoped),
1695            vec!["records/people/sarah"],
1696            "scoped and unscoped backlinks must agree on the edge set"
1697        );
1698    }
1699
1700    // ── neighborhood ─────────────────────────────────────────────────────────
1701
1702    #[test]
1703    fn neighborhood_hops_zero_is_empty() {
1704        let fx = Fixture::new();
1705        fx.write(
1706            "records/profiles/a.md",
1707            "profile",
1708            "A",
1709            "[[records/profiles/b]]",
1710        );
1711        fx.write("records/profiles/b.md", "profile", "B", "");
1712        let slice = neighborhood(
1713            &fx.store,
1714            &fx.p("records/profiles/a.md"),
1715            0,
1716            &[],
1717            Direction::Both,
1718        )
1719        .unwrap();
1720        assert_eq!(slice.seed, fx.p("records/profiles/a"));
1721        assert!(slice.nodes.is_empty());
1722    }
1723
1724    #[test]
1725    fn neighborhood_outgoing_one_hop_reads_summary_and_type() {
1726        let fx = Fixture::new();
1727        fx.write(
1728            "records/profiles/a.md",
1729            "profile",
1730            "Person A",
1731            "Knows [[records/contacts/b]].",
1732        );
1733        fx.write("records/contacts/b.md", "contact", "Contact B summary", "");
1734        let slice = neighborhood(
1735            &fx.store,
1736            &fx.p("records/profiles/a.md"),
1737            1,
1738            &[],
1739            Direction::Outgoing,
1740        )
1741        .unwrap();
1742        assert_eq!(slice.nodes.len(), 1);
1743        let n = &slice.nodes[0];
1744        assert_eq!(n.path, fx.p("records/contacts/b"));
1745        assert_eq!(n.summary, "Contact B summary");
1746        assert_eq!(n.type_.as_deref(), Some("contact"));
1747        assert_eq!(n.hops, 1);
1748        assert_eq!(
1749            n.via,
1750            Some((fx.p("records/profiles/a"), Direction::Outgoing))
1751        );
1752    }
1753
1754    #[test]
1755    fn neighborhood_incoming_only_walks_backlinks() {
1756        let fx = Fixture::new();
1757        // a -> seed (incoming to seed). seed -> c (outgoing from seed).
1758        fx.write(
1759            "records/profiles/seed.md",
1760            "profile",
1761            "Seed",
1762            "Out to [[records/profiles/c]].",
1763        );
1764        fx.write(
1765            "records/profiles/a.md",
1766            "profile",
1767            "A",
1768            "In to [[records/profiles/seed]].",
1769        );
1770        fx.write("records/profiles/c.md", "profile", "C", "");
1771        fx.reindex();
1772        let slice = neighborhood(
1773            &fx.store,
1774            &fx.p("records/profiles/seed.md"),
1775            1,
1776            &[],
1777            Direction::Incoming,
1778        )
1779        .unwrap();
1780        // Incoming direction: only `a` (which links TO seed), not `c`.
1781        assert_eq!(
1782            paths(
1783                &slice
1784                    .nodes
1785                    .iter()
1786                    .map(|n| n.path.clone())
1787                    .collect::<Vec<_>>()
1788            ),
1789            vec!["records/profiles/a"]
1790        );
1791        assert_eq!(
1792            slice.nodes[0].via,
1793            Some((fx.p("records/profiles/seed"), Direction::Incoming))
1794        );
1795    }
1796
1797    #[test]
1798    fn neighborhood_bounded_bfs_respects_hop_limit_and_min_distance() {
1799        let fx = Fixture::new();
1800        // Chain a -> b -> c -> d, all outgoing.
1801        fx.write("records/c/a.md", "concept", "A", "[[records/c/b]]");
1802        fx.write("records/c/b.md", "concept", "B", "[[records/c/c]]");
1803        fx.write("records/c/c.md", "concept", "C", "[[records/c/d]]");
1804        fx.write("records/c/d.md", "concept", "D", "");
1805        let slice = neighborhood(
1806            &fx.store,
1807            &fx.p("records/c/a.md"),
1808            2,
1809            &[],
1810            Direction::Outgoing,
1811        )
1812        .unwrap();
1813        // 2 hops reaches b (1) and c (2), not d (3).
1814        let by_path: HashMap<String, u32> = slice
1815            .nodes
1816            .iter()
1817            .map(|n| (n.path.to_string_lossy().to_string(), n.hops))
1818            .collect();
1819        assert_eq!(by_path.get("records/c/b").copied(), Some(1));
1820        assert_eq!(by_path.get("records/c/c").copied(), Some(2));
1821        assert_eq!(by_path.get("records/c/d"), None);
1822        assert_eq!(slice.nodes.len(), 2);
1823    }
1824
1825    #[test]
1826    fn neighborhood_records_min_hops_on_diamond() {
1827        let fx = Fixture::new();
1828        // Diamond: a -> b, a -> c, b -> d, c -> d. d is reachable at hop 2 from
1829        // either branch; it must be recorded once, at hop 2.
1830        fx.write(
1831            "records/d/a.md",
1832            "concept",
1833            "A",
1834            "[[records/d/b]] [[records/d/c]]",
1835        );
1836        fx.write("records/d/b.md", "concept", "B", "[[records/d/d]]");
1837        fx.write("records/d/c.md", "concept", "C", "[[records/d/d]]");
1838        fx.write("records/d/d.md", "concept", "D", "");
1839        let slice = neighborhood(
1840            &fx.store,
1841            &fx.p("records/d/a.md"),
1842            3,
1843            &[],
1844            Direction::Outgoing,
1845        )
1846        .unwrap();
1847        let d_nodes: Vec<&ContextNode> = slice
1848            .nodes
1849            .iter()
1850            .filter(|n| n.path == fx.p("records/d/d"))
1851            .collect();
1852        assert_eq!(d_nodes.len(), 1, "d must appear exactly once");
1853        assert_eq!(d_nodes[0].hops, 2, "d's min distance from a is 2");
1854        // b and c at hop 1, d at hop 2 => 3 nodes total, no cycle blowup.
1855        assert_eq!(slice.nodes.len(), 3);
1856    }
1857
1858    #[test]
1859    fn neighborhood_type_filter_narrows_results_but_not_traversal() {
1860        let fx = Fixture::new();
1861        // seed -> contact -> meeting. Filtering to `meeting` must still reach
1862        // the meeting THROUGH the (excluded) contact at hop 2.
1863        fx.write(
1864            "records/profiles/seed.md",
1865            "profile",
1866            "Seed",
1867            "[[records/contacts/sarah]]",
1868        );
1869        fx.write(
1870            "records/contacts/sarah.md",
1871            "contact",
1872            "Sarah",
1873            "[[records/meetings/m1]]",
1874        );
1875        fx.write("records/meetings/m1.md", "meeting", "Renewal call", "");
1876        let only_meetings = vec!["meeting".to_string()];
1877        let slice = neighborhood(
1878            &fx.store,
1879            &fx.p("records/profiles/seed.md"),
1880            2,
1881            &only_meetings,
1882            Direction::Outgoing,
1883        )
1884        .unwrap();
1885        // Only the meeting is returned; the contact is traversed but filtered out.
1886        assert_eq!(slice.nodes.len(), 1);
1887        assert_eq!(slice.nodes[0].path, fx.p("records/meetings/m1"));
1888        assert_eq!(slice.nodes[0].type_.as_deref(), Some("meeting"));
1889        assert_eq!(slice.nodes[0].hops, 2);
1890    }
1891
1892    #[test]
1893    fn neighborhood_capped_bounds_traversal_not_just_output() {
1894        // REGRESSION (finding #16): `neighborhood` expands every reached node, and
1895        // each incoming-edge expansion is a full-store scan, so the per-node cost
1896        // is O(visited × store). The CLI's `--limit` was applied post-hoc as a
1897        // `.take(n)` on the RESULT, which caps printed nodes but NOT the traversal
1898        // — the scans still fire for every reachable node. `neighborhood_capped`
1899        // bounds the traversal itself: once `max_nodes` distinct nodes are
1900        // admitted, the BFS stops discovering (and therefore stops scanning).
1901        //
1902        // Structure proving traversal — not just output — is bounded:
1903        //   seed -> a, b, c   (hop 1, discovered in sorted order: a, b, c)
1904        //   a    -> deep      (hop 2, reachable ONLY by expanding `a`)
1905        // Cap at 2: admit `a` and `b`, stop before `c` and before any hop-2
1906        // expansion. `deep` is therefore unreachable. A post-hoc `.take(2)` would
1907        // have traversed the whole graph (reaching `deep`) and only then truncated
1908        // — so the absence of `deep` is observable proof the traversal stopped.
1909        let fx = Fixture::new();
1910        fx.write(
1911            "records/n/seed.md",
1912            "concept",
1913            "Seed",
1914            "[[records/n/a]] [[records/n/b]] [[records/n/c]]",
1915        );
1916        fx.write("records/n/a.md", "concept", "A", "[[records/n/deep]]");
1917        fx.write("records/n/b.md", "concept", "B", "");
1918        fx.write("records/n/c.md", "concept", "C", "");
1919        fx.write("records/n/deep.md", "concept", "Deep", "");
1920
1921        // Uncapped over 3 hops: all four reachable nodes appear (a, b, c at hop 1,
1922        // deep at hop 2) — the full set the cap is measured against.
1923        let full = neighborhood(
1924            &fx.store,
1925            &fx.p("records/n/seed.md"),
1926            3,
1927            &[],
1928            Direction::Outgoing,
1929        )
1930        .unwrap();
1931        assert_eq!(
1932            paths(
1933                &full
1934                    .nodes
1935                    .iter()
1936                    .map(|n| n.path.clone())
1937                    .collect::<Vec<_>>()
1938            ),
1939            vec![
1940                "records/n/a",
1941                "records/n/b",
1942                "records/n/c",
1943                "records/n/deep"
1944            ],
1945            "uncapped traversal reaches every node within the hop budget"
1946        );
1947
1948        // Capped at 2 over the SAME hop budget: exactly the first two hop-1 nodes,
1949        // and crucially NOT `deep` — the cap halted the BFS before any node was
1950        // expanded into hop 2, so the deep node was never traversed to.
1951        let capped = neighborhood_capped(
1952            &fx.store,
1953            &fx.p("records/n/seed.md"),
1954            3,
1955            &[],
1956            Direction::Outgoing,
1957            Some(2),
1958        )
1959        .unwrap();
1960        assert_eq!(
1961            paths(
1962                &capped
1963                    .nodes
1964                    .iter()
1965                    .map(|n| n.path.clone())
1966                    .collect::<Vec<_>>()
1967            ),
1968            vec!["records/n/a", "records/n/b"],
1969            "the cap bounds traversal: only the first 2 nodes are reached, and the \
1970             hop-2 `deep` node (reachable only by expanding a capped-out node) is \
1971             never traversed"
1972        );
1973
1974        // `max_nodes = None` is exactly the unbounded `neighborhood` behavior.
1975        let uncapped = neighborhood_capped(
1976            &fx.store,
1977            &fx.p("records/n/seed.md"),
1978            3,
1979            &[],
1980            Direction::Outgoing,
1981            None,
1982        )
1983        .unwrap();
1984        assert_eq!(
1985            uncapped.nodes.len(),
1986            full.nodes.len(),
1987            "None cap matches the unbounded neighborhood result"
1988        );
1989    }
1990
1991    #[test]
1992    fn neighborhood_capped_both_direction_caps_the_node_count() {
1993        // The CLI always passes `Direction::Both` (the per-node backlinks scan is
1994        // the expensive path the cap exists to bound). The cap gates discovery in
1995        // any direction, so a hub linked from many nodes is still bounded.
1996        let fx = Fixture::new();
1997        fx.write("records/profiles/hub.md", "profile", "Hub", "");
1998        for n in ["a", "b", "c", "d", "e"] {
1999            fx.write(
2000                &format!("records/profiles/{n}.md"),
2001                "profile",
2002                n,
2003                "[[records/profiles/hub]]",
2004            );
2005        }
2006        fx.reindex();
2007
2008        let capped = neighborhood_capped(
2009            &fx.store,
2010            &fx.p("records/profiles/hub.md"),
2011            1,
2012            &[],
2013            Direction::Both,
2014            Some(3),
2015        )
2016        .unwrap();
2017        assert_eq!(
2018            capped.nodes.len(),
2019            3,
2020            "Both-direction neighborhood is bounded to the node cap"
2021        );
2022
2023        // Without the cap the same call returns all five backlinking nodes,
2024        // proving the cap (not the data) limited the set.
2025        let uncapped = neighborhood(
2026            &fx.store,
2027            &fx.p("records/profiles/hub.md"),
2028            1,
2029            &[],
2030            Direction::Both,
2031        )
2032        .unwrap();
2033        assert_eq!(uncapped.nodes.len(), 5);
2034    }
2035
2036    #[test]
2037    fn neighborhood_cycle_terminates() {
2038        let fx = Fixture::new();
2039        // a <-> b cycle. Must not loop forever; each appears once.
2040        fx.write("records/g/a.md", "concept", "A", "[[records/g/b]]");
2041        fx.write("records/g/b.md", "concept", "B", "[[records/g/a]]");
2042        fx.reindex();
2043        let slice =
2044            neighborhood(&fx.store, &fx.p("records/g/a.md"), 10, &[], Direction::Both).unwrap();
2045        // From a: b is the only other node (a is the seed, excluded).
2046        assert_eq!(
2047            paths(
2048                &slice
2049                    .nodes
2050                    .iter()
2051                    .map(|n| n.path.clone())
2052                    .collect::<Vec<_>>()
2053            ),
2054            vec!["records/g/b"]
2055        );
2056    }
2057
2058    // ── orphans ──────────────────────────────────────────────────────────────
2059
2060    #[test]
2061    fn orphans_finds_files_with_no_edges_either_direction() {
2062        let fx = Fixture::new();
2063        // Wired pair: a links to b (a has outgoing, b has incoming).
2064        fx.write(
2065            "records/profiles/a.md",
2066            "profile",
2067            "A",
2068            "[[records/profiles/b]]",
2069        );
2070        fx.write("records/profiles/b.md", "profile", "B", "");
2071        // Orphan: no links in or out.
2072        fx.write(
2073            "sources/emails/lonely.md",
2074            "email",
2075            "Lonely email",
2076            "Just text, no links.",
2077        );
2078        let got = orphans(&fx.store, None).unwrap();
2079        assert_eq!(paths(&got), vec!["sources/emails/lonely.md"]);
2080    }
2081
2082    #[test]
2083    fn orphans_file_with_only_broken_outgoing_link_is_orphan() {
2084        let fx = Fixture::new();
2085        // Broken targets are validation issues, not graph edges to another
2086        // store file. A file whose only link points nowhere is still an orphan.
2087        fx.write(
2088            "records/profiles/a.md",
2089            "profile",
2090            "A",
2091            "[[records/contacts/ghost]]",
2092        );
2093        let got = orphans(&fx.store, None).unwrap();
2094        assert!(
2095            paths(&got).contains(&"records/profiles/a.md".to_string()),
2096            "broken outgoing links must not wire the graph: {got:?}"
2097        );
2098    }
2099
2100    #[test]
2101    fn orphans_file_with_only_incoming_is_not_orphan() {
2102        let fx = Fixture::new();
2103        // `target` has no outgoing links but IS linked to by `linker` — not an orphan.
2104        fx.write("records/contacts/target.md", "contact", "Target", "");
2105        fx.write(
2106            "records/profiles/linker.md",
2107            "profile",
2108            "Linker",
2109            "[[records/contacts/target]]",
2110        );
2111        let got = orphans(&fx.store, None).unwrap();
2112        assert!(
2113            !paths(&got).contains(&"records/contacts/target.md".to_string()),
2114            "incoming-only is not an orphan: {got:?}"
2115        );
2116        // `linker` has outgoing, so also not an orphan.
2117        assert!(!paths(&got).contains(&"records/profiles/linker.md".to_string()));
2118    }
2119
2120    #[test]
2121    fn orphans_incoming_link_from_other_layer_unorphans() {
2122        let fx = Fixture::new();
2123        // Candidate in records/, only incoming edge comes from sources/ — a
2124        // cross-layer link must still un-orphan it even when scoped to records.
2125        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
2126        fx.write(
2127            "sources/emails/sarah.md",
2128            "email",
2129            "bio",
2130            "[[records/contacts/sarah]]",
2131        );
2132        // A genuine orphan in records/ to prove the scope still returns something.
2133        fx.write("records/contacts/nemo.md", "contact", "Nemo", "");
2134        let got = orphans(&fx.store, Some(Layer::Records)).unwrap();
2135        assert_eq!(paths(&got), vec!["records/contacts/nemo.md"]);
2136    }
2137
2138    #[test]
2139    fn orphans_layer_scope_filters_candidates() {
2140        let fx = Fixture::new();
2141        // Orphans across both layers: one source, and two records (an atomic
2142        // contact + a conclusion `profile`, the former wiki-page).
2143        fx.write("sources/emails/s.md", "email", "S", "no links");
2144        fx.write("records/contacts/r.md", "contact", "R", "");
2145        fx.write("records/profiles/w.md", "profile", "W", "");
2146        // The records scope keeps only the two records-layer orphans.
2147        let only_records = orphans(&fx.store, Some(Layer::Records)).unwrap();
2148        assert_eq!(
2149            paths(&only_records),
2150            vec!["records/contacts/r.md", "records/profiles/w.md"]
2151        );
2152        let only_sources = orphans(&fx.store, Some(Layer::Sources)).unwrap();
2153        assert_eq!(paths(&only_sources), vec!["sources/emails/s.md"]);
2154        // No scope: all three, sorted (records, records, sources).
2155        let all = orphans(&fx.store, None).unwrap();
2156        assert_eq!(
2157            paths(&all),
2158            vec![
2159                "records/contacts/r.md",
2160                "records/profiles/w.md",
2161                "sources/emails/s.md",
2162            ]
2163        );
2164    }
2165
2166    #[test]
2167    fn orphans_self_link_does_not_count_as_an_edge() {
2168        let fx = Fixture::new();
2169        // A page that only links to itself has no real edges => still an orphan.
2170        fx.write(
2171            "records/synthesis/solo.md",
2172            "synthesis",
2173            "Solo",
2174            "I reference [[records/synthesis/solo]] only.",
2175        );
2176        let got = orphans(&fx.store, None).unwrap();
2177        assert_eq!(paths(&got), vec!["records/synthesis/solo.md"]);
2178    }
2179
2180    #[test]
2181    fn orphans_excludes_index_and_db_files() {
2182        let fx = Fixture::new();
2183        // A lone index.md / DB.md must never be reported as an orphan content file.
2184        fx.write_raw(
2185            "records/index.md",
2186            "---\ntype: index\nscope: layer\nfolder: records\n---\n# records\n",
2187        );
2188        fx.write(
2189            "records/profiles/real-orphan.md",
2190            "profile",
2191            "Real",
2192            "no links",
2193        );
2194        let got = orphans(&fx.store, None).unwrap();
2195        assert_eq!(paths(&got), vec!["records/profiles/real-orphan.md"]);
2196    }
2197
2198    // ── frontmatter_block helper ─────────────────────────────────────────────
2199
2200    #[test]
2201    fn frontmatter_block_extracts_between_fences() {
2202        let text = "---\ntype: contact\nsummary: hi\n---\nbody here\n";
2203        assert_eq!(
2204            frontmatter_block(text),
2205            Some("type: contact\nsummary: hi\n")
2206        );
2207    }
2208
2209    #[test]
2210    fn frontmatter_block_none_without_leading_fence() {
2211        let text = "no frontmatter here\n";
2212        assert_eq!(frontmatter_block(text), None);
2213    }
2214
2215    #[test]
2216    fn frontmatter_block_tolerates_leading_bom() {
2217        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
2218        // fence must not hide the frontmatter from the graph layer — otherwise a
2219        // BOM-prefixed file the catalog indexes contributes no backlinks/edges.
2220        // Pre-fix the `---\n` strip failed on the BOM and returned None.
2221        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody here\n";
2222        assert_eq!(
2223            frontmatter_block(text),
2224            Some("type: contact\nsummary: hi\n"),
2225            "a leading BOM must not hide frontmatter from the graph layer"
2226        );
2227    }
2228
2229    // ── shared edge notion: whitespace / fence / case / containment ──────────
2230
2231    /// Padded `[[ x ]]` must be a forward edge AND (after reindex) a backward
2232    /// edge — the two views agreeing on the same edge in a clean store.
2233    #[test]
2234    fn padded_link_is_both_a_forward_and_backward_edge() {
2235        let fx = Fixture::new();
2236        fx.write(
2237            "records/contacts/sarah.md",
2238            "contact",
2239            "Sarah",
2240            "the contact",
2241        );
2242        fx.write(
2243            "records/profiles/a.md",
2244            "profile",
2245            "A",
2246            "See [[ records/contacts/sarah ]] today.",
2247        );
2248        fx.reindex();
2249
2250        assert_eq!(
2251            paths(&forwardlinks(&fx.store, Path::new("records/profiles/a.md")).unwrap()),
2252            vec!["records/contacts/sarah"],
2253            "padded link is a forward edge"
2254        );
2255        assert_eq!(
2256            paths(&backlinks(&fx.store, Path::new("records/contacts/sarah.md")).unwrap()),
2257            vec!["records/profiles/a"],
2258            "padded link is the SAME backward edge (forward and backward agree)"
2259        );
2260    }
2261
2262    /// A `[[...]]` only inside a fenced code block is a documentation example,
2263    /// not an edge: no forward edge, no backward edge, and the source page is an
2264    /// orphan (no real links). Matches validate's fence-aware extractor.
2265    #[test]
2266    fn fenced_link_is_not_an_edge_and_page_is_orphan() {
2267        let fx = Fixture::new();
2268        fx.write(
2269            "records/contacts/sarah.md",
2270            "contact",
2271            "Sarah",
2272            "the contact",
2273        );
2274        fx.write(
2275            "records/synthesis/howto.md",
2276            "synthesis",
2277            "Howto",
2278            "```markdown\n[[records/contacts/sarah]] is how you link.\n```",
2279        );
2280        fx.reindex();
2281
2282        assert!(
2283            forwardlinks(&fx.store, Path::new("records/synthesis/howto.md"))
2284                .unwrap()
2285                .is_empty(),
2286            "a fenced example is not a forward edge"
2287        );
2288        assert!(
2289            backlinks(&fx.store, Path::new("records/contacts/sarah.md"))
2290                .unwrap()
2291                .is_empty(),
2292            "a fenced example is not a backward edge"
2293        );
2294        let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2295        assert!(
2296            orphan_set.contains(&"records/synthesis/howto.md".to_string()),
2297            "a page whose only link is fenced has no real edges => orphan: {orphan_set:?}"
2298        );
2299    }
2300
2301    /// `rename` must NOT rewrite a `[[...]]` inside a fenced code block (it is
2302    /// verbatim documentation, not an edge), while still rewriting a real link.
2303    #[test]
2304    fn rewrite_links_to_leaves_fenced_examples_untouched() {
2305        let input = "\
2306Real [[records/contacts/sarah]] link.
2307
2308```markdown
2309Example: [[records/contacts/sarah]] inside a fence.
2310```
2311
2312Trailing [[records/contacts/sarah]].
2313";
2314        let got = rewrite_links_to(
2315            input,
2316            Path::new("records/contacts/sarah"),
2317            Path::new("records/contacts/sarah-chen"),
2318        );
2319        // The two non-fenced links retarget; the fenced one is verbatim.
2320        assert!(
2321            got.contains("Real [[records/contacts/sarah-chen]] link."),
2322            "real link before the fence must retarget"
2323        );
2324        assert!(
2325            got.contains("Trailing [[records/contacts/sarah-chen]]."),
2326            "real link after the fence must retarget"
2327        );
2328        assert!(
2329            got.contains("Example: [[records/contacts/sarah]] inside a fence."),
2330            "fenced example must stay verbatim, got:\n{got}"
2331        );
2332    }
2333
2334    /// `rewrite_links_to` matches a padded link and preserves the display.
2335    #[test]
2336    fn rewrite_links_to_matches_padded_link() {
2337        let got = rewrite_links_to(
2338            "See [[ records/contacts/sarah |Sarah]] today.",
2339            Path::new("records/contacts/sarah"),
2340            Path::new("records/contacts/sarah-chen"),
2341        );
2342        assert_eq!(got, "See [[records/contacts/sarah-chen|Sarah]] today.");
2343    }
2344
2345    /// On a case-insensitive filesystem a case-variant link is the same edge:
2346    /// backlinks finds it, orphans does NOT falsely orphan the target, and
2347    /// rename rewrites it. On a case-sensitive FS the link is genuinely a
2348    /// different target, so the test is skipped.
2349    #[cfg(unix)]
2350    #[test]
2351    fn case_variant_link_is_one_edge_on_case_insensitive_fs() {
2352        // Probe the filesystem the same way the production code does
2353        // (`link_edge_key` is imported at module scope).
2354        if link_edge_key("A") != link_edge_key("a") {
2355            // case-sensitive filesystem: the case-variant link is a different
2356            // target, so this scenario doesn't apply.
2357            return;
2358        }
2359        let fx = Fixture::new();
2360        fx.write(
2361            "records/contacts/sarah-chen.md",
2362            "contact",
2363            "Sarah",
2364            "the contact",
2365        );
2366        fx.write(
2367            "records/profiles/bio.md",
2368            "profile",
2369            "Bio",
2370            "See [[records/contacts/Sarah-Chen]].",
2371        );
2372        fx.reindex();
2373
2374        assert_eq!(
2375            paths(&backlinks(&fx.store, Path::new("records/contacts/sarah-chen.md")).unwrap()),
2376            vec!["records/profiles/bio"],
2377            "case-variant incoming link must be a backward edge"
2378        );
2379        let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2380        assert!(
2381            !orphan_set.contains(&"records/contacts/sarah-chen.md".to_string()),
2382            "a target with a live case-variant incoming link must NOT be orphaned: {orphan_set:?}"
2383        );
2384
2385        let rewritten = rewrite_links_to(
2386            "See [[records/contacts/Sarah-Chen]].",
2387            Path::new("records/contacts/sarah-chen"),
2388            Path::new("records/contacts/sarah"),
2389        );
2390        assert_eq!(
2391            rewritten, "See [[records/contacts/sarah]].",
2392            "rename must rewrite the case-variant link on a case-insensitive FS"
2393        );
2394    }
2395
2396    /// A `[[../outside/x]]` escaping wiki-link is never a forward edge, and a
2397    /// `neighborhood` from the escaping page never reads or traverses through the
2398    /// external file — closing the disclosure vector.
2399    #[cfg(unix)]
2400    #[test]
2401    fn escaping_link_is_not_an_edge_and_neighborhood_does_not_escape() {
2402        let fx = Fixture::new();
2403        // An external file OUTSIDE the store root, with its own in-store link.
2404        let outside_dir = fx.store.root.parent().unwrap().join("outside");
2405        fs::create_dir_all(&outside_dir).unwrap();
2406        fs::write(
2407            outside_dir.join("secret.md"),
2408            "---\ntype: note\nsummary: TOPSECRET\n---\nLinks [[records/contacts/sarah]].\n",
2409        )
2410        .unwrap();
2411        fx.write(
2412            "records/contacts/sarah.md",
2413            "contact",
2414            "Sarah",
2415            "the contact",
2416        );
2417        fx.write(
2418            "records/concepts/traversal.md",
2419            "concept",
2420            "Traversal",
2421            "See [[../outside/secret]].",
2422        );
2423        fx.reindex();
2424
2425        // The escaping target is not a forward edge.
2426        assert!(
2427            forwardlinks(&fx.store, Path::new("records/concepts/traversal.md"))
2428                .unwrap()
2429                .is_empty(),
2430            "an escaping `[[../outside/secret]]` must not be a forward edge"
2431        );
2432
2433        // Neighborhood from the escaping page reaches nothing through the
2434        // external file (the external file is never read/traversed).
2435        let slice = neighborhood(
2436            &fx.store,
2437            Path::new("records/concepts/traversal.md"),
2438            2,
2439            &[],
2440            Direction::Outgoing,
2441        )
2442        .unwrap();
2443        assert!(
2444            slice
2445                .nodes
2446                .iter()
2447                .all(|n| !n.path.to_string_lossy().contains("outside")),
2448            "neighborhood must not read/traverse the external file: {:?}",
2449            slice.nodes
2450        );
2451    }
2452
2453    #[test]
2454    fn regression_non_utf8_linker_edges_survive_scoped_backlinks_and_orphans() {
2455        // Adversarial review #10: a content file with a stray non-UTF8 byte beside
2456        // a valid ASCII `[[...]]` line must still expose its edges. The unscoped
2457        // backlink scanner reads bytes lossily, but `forwardlinks`/`orphans` used
2458        // `read_to_string` and dropped EVERY edge on `InvalidData` — so scoped
2459        // backlinks under-reported vs unscoped, and `orphans` flagged BOTH
2460        // endpoints of a live edge.
2461        let fx = Fixture::new();
2462        fx.write("records/contacts/sarah.md", "contact", "Sarah", "# Sarah");
2463        // bio.md: valid UTF-8 frontmatter, but a BODY line with a 0xE9 byte
2464        // (Latin-1 'é', invalid as standalone UTF-8) beside the link to sarah.
2465        let mut bytes: Vec<u8> = Vec::new();
2466        bytes.extend_from_slice(
2467            b"---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Bio\n---\n",
2468        );
2469        bytes.extend_from_slice(b"See [[records/contacts/sarah]] caf");
2470        bytes.push(0xE9);
2471        bytes.extend_from_slice(b"\n");
2472        let bio_abs = fx.store.root.join("records/profiles/bio.md");
2473        fs::create_dir_all(bio_abs.parent().unwrap()).unwrap();
2474        fs::write(&bio_abs, &bytes).unwrap();
2475        fx.reindex();
2476
2477        let sarah = fx.p("records/contacts/sarah");
2478
2479        // forwardlinks reads the non-UTF8 file and still finds the edge.
2480        let fwd = paths(&forwardlinks(&fx.store, &fx.p("records/profiles/bio")).unwrap());
2481        assert!(
2482            fwd.iter().any(|p| p.contains("sarah")),
2483            "forwardlinks must extract the edge from a non-UTF8 file: {fwd:?}"
2484        );
2485
2486        // Scoped backlinks (rides `forwardlinks`) must AGREE with unscoped.
2487        let unscoped = paths(&backlinks(&fx.store, &sarah).unwrap());
2488        let scoped =
2489            paths(&backlinks_filtered(&fx.store, &sarah, &["profile".to_string()], None).unwrap());
2490        assert!(
2491            unscoped.iter().any(|p| p.contains("bio")),
2492            "unscoped backlinks must include bio: {unscoped:?}"
2493        );
2494        assert!(
2495            scoped.iter().any(|p| p.contains("bio")),
2496            "scoped backlinks must agree with unscoped on the non-UTF8 linker: {scoped:?}"
2497        );
2498
2499        // Neither endpoint of the live edge may be reported as an orphan.
2500        let orph = paths(&orphans(&fx.store, None).unwrap());
2501        assert!(
2502            !orph
2503                .iter()
2504                .any(|p| p.contains("bio") || p.contains("sarah")),
2505            "neither endpoint of a live edge may be an orphan: {orph:?}"
2506        );
2507    }
2508}