Skip to main content

dbmd_core/
graph.rs

1//! `graph` — the wiki-link **relationship layer**.
2//!
3//! Wiki-links are curated-relevance edges (the LLM wrote them), so the graph's
4//! job is to **assemble the relevant context around a seed**, not to be
5//! analyzed. **All ops are on-demand — there is no maintained graph** (a
6//! persistent graph is the roadmap engine).
7//!
8//! [`backlinks`] / [`forwardlinks`] are loop ops (O(changed), never O(store)).
9//! [`neighborhood`] is the high-value context-hydration op. [`orphans`] is a
10//! SWEEP curation worklist.
11//!
12//! Whole-graph analytics (connected components, cycle detection, shortest
13//! path, sinks/sources, DOT/JSON export) are deliberately **not** here — a
14//! human studying the graph opens the store in Obsidian; broken-link detection
15//! is [`crate::validate`]'s job (`WIKI_LINK_BROKEN`).
16//!
17//! ## Implementation note — two paths for the incoming-edge scan
18//!
19//! The scale contract (SPEC § Tooling, plan: *"the interactive loop is
20//! O(changed), never O(store)"*) is the load-bearing rule here. [`backlinks`]
21//! is a loop op, so it must **not** open and `read_to_string` every content file
22//! in the store on each call. It resolves incoming edges by one of two paths,
23//! chosen by whether the call is scoped:
24//!
25//! - **Unscoped** (`dbmd graph backlinks <x>`, no `--type`/`--in`): one
26//!   embedded-ripgrep pass for the literal `[[<target>]]` over the tree, via
27//!   [`Store::find_links_to`] (`grep` + `ignore`, early-exit per file) — the
28//!   same scan engine [`crate::validate`]'s working-set incoming-linker step
29//!   uses. A single store traversal with cheap presence-only matching, not N
30//!   whole-file parses; that is what keeps the unscoped call inside the loop
31//!   budget. [`backlinks`] then filters the raw hits to content files and emits
32//!   canonical bare targets (its relationship view), where the lower-level
33//!   [`Store::find_links_to`] returns every `.md` the text appears in.
34//! - **Scoped** (`--type` / `--in`): the candidate set is enumerated from the
35//!   relevant layer's `index.jsonl` sidecars — the sidecars of the one layer the
36//!   `--type` belongs to (via [`Store::sidecar_records`]), filtered to that type
37//!   — and each candidate is confirmed by a single-file parse. That is what makes
38//!   `--type` / `--in` an *I/O* scope, not just a result filter: a typed/layer-scoped
39//!   `backlinks` reads only the relevant layer's sidecars (O(entities-in-layer))
40//!   and parses only those files. A type's records can span several folders within
41//!   its layer (a `profile` filed under any `records/<folder>/`, not only its
42//!   canonical `records/profiles/`), so the read is layer-wide, not a single
43//!   canonical folder — otherwise off-canonical-folder linkers would be silently
44//!   dropped.
45//!
46//! **Why the scoped path confirms by parsing the candidate, not by trusting the
47//! sidecar's `links` field.** A sidecar record's `links` is the file's
48//! *frontmatter* `links:` list only — it does **not** capture wiki-links written
49//! in the body or inside other typed frontmatter fields (`company: [[…]]`,
50//! `attendees: [ … ]`, `derived_from: [ … ]`). [`forwardlinks`] extracts edges
51//! from the whole file, so to keep the two directions on the **same** edge set
52//! (an incoming edge to X is exactly: some file whose [`forwardlinks`] contains
53//! X) the incoming-edge confirmation re-parses each candidate file the same way.
54//! The sidecar bounds *which* files are candidates; the parse decides whether
55//! each truly links. The unscoped ripgrep path stays on that same edge set by
56//! matching the link text wherever it lives in the file (frontmatter or body).
57//! A node's `summary` / `type` likewise read frontmatter directly (the source of
58//! truth the sidecar is derived from; never stale).
59
60use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
61use std::path::{Path, PathBuf};
62
63use ignore::WalkBuilder;
64
65use crate::index::IndexRecord;
66use crate::store::{
67    canonical_link_target, ensure_path_within_store, extract_edge_targets, fence_closes,
68    fence_opens, link_edge_key, Layer, Store, StoreError,
69};
70
71/// Which edge directions a traversal follows.
72#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub enum Direction {
74    /// Incoming edges only (backlinks).
75    Incoming,
76    /// Outgoing edges only (forwardlinks).
77    Outgoing,
78    /// Both directions.
79    Both,
80}
81
82/// One node reached during a [`neighborhood`] hydration: the file, its
83/// `summary`, and how it connects back toward the seed.
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub struct ContextNode {
86    /// The store-relative path of the reached file.
87    pub path: PathBuf,
88    /// The file's `summary` (read from its sidecar entry / frontmatter).
89    pub summary: String,
90    /// The file's `type`, when known.
91    pub type_: Option<String>,
92    /// Hop distance from the seed (the seed itself is 0).
93    pub hops: u32,
94    /// The relationship edge that brought this node into the slice: the path it
95    /// links to/from one hop closer to the seed, and the direction.
96    pub via: Option<(PathBuf, Direction)>,
97}
98
99/// The readable working-set digest [`neighborhood`] returns: the seed plus the
100/// reached nodes with their summaries and connections. The relationship-axis
101/// "turn a seed into context" primitive.
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct ContextSlice {
104    /// The seed the slice was hydrated from.
105    pub seed: PathBuf,
106    /// The reached nodes (excluding the seed), in BFS order.
107    pub nodes: Vec<ContextNode>,
108}
109
110/// Incoming edges to `path`: files that wiki-link to it. The blast-radius /
111/// dependents primitive before an edit. Store-wide (every layer / every type);
112/// see [`backlinks_filtered`] for the `--type` / `--in`-scoped form.
113///
114/// `path` is the store-relative target as it would be written inside a
115/// wiki-link (with or without a trailing `.md`; both resolve to the same
116/// target). Returns each linking file as its **canonical bare wiki-link path**
117/// (store-relative, no `.md`) — the same key [`forwardlinks`] emits, so the two
118/// directions round-trip and [`neighborhood`] can use one node identity.
119/// Deduped, sorted, never including the seed itself.
120pub fn backlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
121    backlinks_filtered(store, path, &[], None)
122}
123
124/// Incoming edges to `path`, scoped by the linking file's `type` and/or layer —
125/// the `dbmd graph backlinks --type/--in` surface.
126///
127/// **Scale (the loop contract).** Two paths, by whether the call is scoped:
128///
129/// - **Unscoped** (`types` empty *and* `layer` `None`): one embedded-ripgrep
130///   pass for `[[<target>]]` across the store via [`Store::find_links_to`] — a
131///   single `grep` + `ignore` traversal with early-exit per file, never a
132///   `read_to_string` of every content file. This is the same scan engine
133///   [`crate::validate::validate_working_set`]'s incoming-linker step rides, and
134///   it keeps the unscoped call inside the loop budget (the old per-candidate
135///   confirm-read re-opened every file in the store → O(store)).
136/// - **Scoped** (`types` and/or `layer` set): the candidate set — the files that
137///   *might* link to `path` — is read from `index.jsonl` sidecars (never a
138///   content-tree walk). With a `--in <layer>` the read touches only that layer:
139///   O(entities-in-layer), the sanctioned loop cost. A type-only scope (no `--in`)
140///   reads store-wide sidecars and filters by `type`, exactly as
141///   [`crate::query::Query::execute`] does — so a record of the type filed under a
142///   non-canonical folder of its layer (a `profile` under any `records/<folder>/`)
143///   *and* a **loose file** of the type filed at the *other* layer's root (a `note`
144///   filed directly under `records/`, catalogued in `records/index.jsonl`) are both
145///   candidates. Each candidate is then confirmed by a single-file parse.
146///
147/// **Correctness (one edge set, both paths).** An incoming edge to X is exactly:
148/// some file whose [`forwardlinks`] contains X — a wiki-link in the body or in
149/// *any* frontmatter field (`company: [[…]]`, `attendees: [ … ]`), not just the
150/// sidecar's frontmatter `links:` projection. Both paths honor that:
151/// - The unscoped scan matches the literal `[[<target>]]` text wherever it lives
152///   in a file (frontmatter or body), the same edges [`forwardlinks`] extracts.
153///   [`Store::find_links_to`] returns *every* `.md` carrying the link text
154///   (including `index.md` catalogs); [`backlinks`] is the relationship view, so
155///   the results are filtered to content files ([`is_content_rel`]) and emitted
156///   as canonical bare targets, self-excluded.
157/// - The scoped path confirms each candidate via [`file_links_to`], which
158///   delegates to [`forwardlinks`] (body + every frontmatter field) — so a
159///   body-only or typed-field edge is caught, not just the sidecar's `links:`
160///   list.
161///
162/// Result form (canonical bare paths, deduped, sorted, seed excluded) is
163/// identical on both paths and matches [`backlinks`].
164pub fn backlinks_filtered(
165    store: &Store,
166    path: &Path,
167    types: &[String],
168    layer: Option<Layer>,
169) -> Result<Vec<PathBuf>, StoreError> {
170    let target = normalize_target(path);
171    if target.is_empty() {
172        return Ok(Vec::new());
173    }
174    let target_key = edge_key(&target);
175
176    // Unscoped: one content pass over the store (O(store) scan with early-exit
177    // per file), not a per-candidate read of every content file. `find_links_to`
178    // returns every `.md` carrying an edge to the target (incl. catalog
179    // `index.md`); narrow to content files and canonicalize to the bare target
180    // form `backlinks` emits, dropping the seed's self-link.
181    if types.is_empty() && layer.is_none() {
182        let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
183        for rel in store.find_links_to(path)? {
184            if !is_content_rel(&rel) {
185                continue;
186            }
187            let linker = normalize_target(&rel);
188            if linker.is_empty() || edge_key(&linker) == target_key {
189                // A file never counts as its own backlink (case-folded so a
190                // case-variant self-link is still excluded).
191                continue;
192            }
193            hits.insert(PathBuf::from(linker));
194        }
195        return Ok(hits.into_iter().collect());
196    }
197
198    // Scoped: read only the named folder(s)' sidecars for the candidate set, then
199    // confirm each candidate with a single-file parse — O(folder), the I/O scope
200    // `--type` / `--in` buys.
201    let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
202    for candidate in candidate_records(store, types, layer)? {
203        let rel = &candidate.path;
204        let candidate_target = normalize_target(rel);
205        if candidate_target.is_empty() || edge_key(&candidate_target) == target_key {
206            // A file never counts as its own backlink.
207            continue;
208        }
209        // Confirm the edge by parsing the candidate file the same way
210        // forwardlinks does (body + all frontmatter), so body/typed-field links
211        // are caught — the sidecar's `links` field alone would miss them.
212        if file_links_to(store, rel, &target)? {
213            hits.insert(PathBuf::from(candidate_target));
214        }
215    }
216
217    Ok(hits.into_iter().collect())
218}
219
220/// Outgoing edges from `path`: the wiki-link targets extracted from that single
221/// file. Loop-fast; follow the evidence chain.
222///
223/// `path` is the store-relative path of the file to read. Targets are returned
224/// as store-relative paths (bare, no `.md`), deduped and sorted; the file's
225/// links to itself are dropped. A missing file yields an empty list (a
226/// dangling seed has no outgoing edges to report — broken-link detection is
227/// [`crate::validate`]'s job).
228pub fn forwardlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
229    let self_key = edge_key(&normalize_target(path));
230    let abs = match resolve_existing(store, path) {
231        Some(a) => a,
232        None => return Ok(Vec::new()),
233    };
234    // Decode the body LOSSILY (bytes -> `from_utf8_lossy`): wiki-link syntax
235    // (`[[...]]`) is ASCII, so a non-UTF8 byte elsewhere on a line cannot hide an
236    // edge. This mirrors the unscoped backlink scanner
237    // ([`Store::find_links_to_any`], which reads bytes + lossy by design) so
238    // SCOPED backlinks (which ride `forwardlinks`) agree with unscoped backlinks
239    // on a Latin-1-imported file instead of silently dropping its edges — a
240    // `read_to_string` that errored on `InvalidData` returned NO edges.
241    let body = match std::fs::read(&abs) {
242        Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
243        Err(e) => return Err(StoreError::Io(e)),
244    };
245
246    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
247    for target in extract_link_targets(&body) {
248        // Self-link drop is case-folded so a case-variant self-reference is also
249        // excluded on a case-insensitive filesystem.
250        if target.is_empty() || edge_key(&target) == self_key {
251            continue;
252        }
253        out.insert(PathBuf::from(target));
254    }
255    Ok(out.into_iter().collect())
256}
257
258/// The candidate set for an incoming-edge scan: the sidecar records that could
259/// link to the target, read from the `index.jsonl` sidecars (never a content-tree
260/// walk). `types`/`layer` narrow *which* sidecars are read — the I/O scope that
261/// keeps a typed/layer backlinks O(entities-in-layer) when a layer is named.
262///
263/// - `types` non-empty, `layer` given: read **only that layer's** sidecars
264///   (O(entities-in-layer)) and keep the records whose `type` is in `types`. The
265///   read is *not* short-circuited on a layer that disagrees with a type's
266///   canonical layer, because a record of that type may legitimately be filed
267///   there as a **loose file** (a `note` filed directly at `records/`, catalogued
268///   in `records/index.jsonl`); the `type` filter on the layer read is what keeps
269///   the result correct in either case.
270/// - `types` non-empty, `layer` `None`: read **store-wide** sidecars and keep the
271///   records whose `type` is in `types` — exactly what [`crate::query::Query::execute`]
272///   does for a type-only query. This is complete across every folder *and* every
273///   layer the type is filed under: its canonical-layer records (the common case)
274///   plus any loose file of that type filed at the *other* layer's root.
275/// - `types` empty: every sidecar record under `layer` (or store-wide when
276///   `None`) via [`Store::sidecar_records`].
277///
278/// **Why store-wide (not the type's one canonical layer) for the type-only case.**
279/// [`layer_for_type`](crate::store::layer_for_type) maps a type to exactly ONE
280/// layer (`note` → Sources, `contact`
281/// → Records), but a loose file (SPEC § Loose files) may legitimately be filed at
282/// the *other* layer's root and catalogued in that layer's `index.jsonl`. Reading
283/// only `layer_for_type(T)` would silently drop a records-loose `note` from
284/// `backlinks --type note`, and early-`continue`-ing on `--in records` (because
285/// `records` ≠ `layer_for_type(note)`) would return empty — diverging from the
286/// unscoped scan, from `--type T --in <layer>`, and from `dbmd query --type T`.
287/// Reading store-wide (or the named layer) and filtering by `type` is sidecar-backed
288/// (no content-tree walk) and keeps the scoped edge set equal to the unscoped one.
289/// A `type` can also span several folders within one layer — a conclusion `profile`
290/// filed under any `records/<folder>/`, not only `records/profiles/` — and the
291/// store-wide/layer read covers that too.
292fn candidate_records(
293    store: &Store,
294    types: &[String],
295    layer: Option<Layer>,
296) -> Result<Vec<IndexRecord>, StoreError> {
297    if types.is_empty() {
298        return store.sidecar_records(layer);
299    }
300    let want: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
301    // A layer scope reads only that layer's sidecars (O(entities-in-layer)); with
302    // no layer, read store-wide so a loose file of the type filed at *either*
303    // layer's root is covered — matching `Query::execute`'s type-only candidate
304    // set. The `type` filter (not a per-type canonical-layer guess) is what makes
305    // both correct, so a loose `note` under `records/` is found and a `note` under
306    // `sources/` is excluded when `--in records`.
307    let mut by_path: std::collections::BTreeMap<PathBuf, IndexRecord> =
308        std::collections::BTreeMap::new();
309    for rec in store.sidecar_records(layer)? {
310        if want.contains(rec.type_.as_str()) {
311            by_path.insert(rec.path.clone(), rec);
312        }
313    }
314    Ok(by_path.into_values().collect())
315}
316
317/// True if the store file at `rel` carries a wiki-link whose canonical target
318/// equals `target`. Delegates to [`forwardlinks`] so the incoming-edge predicate
319/// is *exactly* the outgoing-edge extraction — body + every frontmatter field —
320/// keeping the two directions on one edge set. `forwardlinks` already emits
321/// canonical bare targets, so `target` (likewise normalized by the caller) is
322/// compared directly. A missing/binary file links to nothing.
323fn file_links_to(store: &Store, rel: &Path, target: &str) -> Result<bool, StoreError> {
324    let edges = forwardlinks(store, rel)?;
325    let target_key = edge_key(target);
326    // Compare on the case-folded edge key so a case-variant link (e.g.
327    // `[[records/contacts/Sarah-Chen]]` to `sarah-chen.md`) is confirmed on a
328    // case-insensitive filesystem, agreeing with the unscoped scan and validate.
329    Ok(edges
330        .iter()
331        .any(|e| edge_key(&e.to_string_lossy()) == target_key))
332}
333
334/// **Context hydration.** Bounded BFS from `seed` over backlinks + forwardlinks
335/// out to `hops`, reading each reached file's `summary` + relationship, and
336/// returning a readable [`ContextSlice`]. Optionally filtered by `types` and
337/// `direction`. On-demand; no maintained graph. What the agent reaches for to
338/// assemble a working set in one call.
339///
340/// Traversal semantics:
341/// - **`hops`** bounds true graph distance from the seed. `hops == 0` returns
342///   an empty slice (the seed alone is no context).
343/// - **`direction`** selects which edges are followed: `Incoming` walks
344///   backlinks, `Outgoing` walks forwardlinks, `Both` walks the union.
345/// - **`types`**, when non-empty, filters which reached nodes appear in the
346///   slice — but traversal still passes *through* off-type nodes, so a
347///   `meeting` two hops out is still reachable through a `contact` even when
348///   filtering to `meeting`. (An empty `types` slice imposes no filter.)
349/// - Each node records the lowest hop count at which it is first reached (BFS
350///   order); the seed is never included as a node.
351///
352/// Unbounded traversal: delegates to [`neighborhood_capped`] with no node cap, so
353/// it expands every reachable node within `hops`. For a densely-interlinked store
354/// this is one full-store backlinks scan **per reached node** (O(visited × store))
355/// — prefer [`neighborhood_capped`] with a `max_nodes` cap to bound that work.
356pub fn neighborhood(
357    store: &Store,
358    seed: &Path,
359    hops: u32,
360    types: &[String],
361    direction: Direction,
362) -> Result<ContextSlice, StoreError> {
363    neighborhood_capped(store, seed, hops, types, direction, None)
364}
365
366/// [`neighborhood`] with a hard cap on how many nodes the BFS **traverses**.
367///
368/// `max_nodes` bounds the *traversal*, not just the result: each node the BFS
369/// expands triggers a per-node incoming-edge scan (an unscoped [`backlinks`] is a
370/// full-store ripgrep pass), so an uncapped neighborhood of a hub node costs
371/// O(visited × store). A post-hoc `.take(n)` on the returned nodes caps the
372/// *output* but not that work — the scans still run for every reached node. This
373/// cap stops discovering (and therefore stops scanning) once `max_nodes` distinct
374/// non-seed nodes have entered the BFS, so the expensive per-node scans are bounded
375/// to at most `max_nodes` of them. `None` is unbounded (the [`neighborhood`]
376/// behavior).
377///
378/// The cap is applied at *discovery* in BFS order, so the kept nodes are exactly
379/// the first `max_nodes` reached (closest-first by hop), and each still records its
380/// true minimum hop distance. Type-filtered (off-type) nodes count against the cap
381/// because the BFS must still traverse *through* them to reach deeper on-type
382/// nodes — the scan cost is paid when a node is expanded, on- or off-type alike.
383pub fn neighborhood_capped(
384    store: &Store,
385    seed: &Path,
386    hops: u32,
387    types: &[String],
388    direction: Direction,
389    max_nodes: Option<usize>,
390) -> Result<ContextSlice, StoreError> {
391    let seed_rel = PathBuf::from(normalize_target(seed));
392    let type_filter: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
393
394    // `discovered` guards against revisiting a node (and against re-adding the
395    // seed). BFS by levels so the first time we reach a node is its true min
396    // hop distance.
397    let mut discovered: HashSet<PathBuf> = HashSet::new();
398    discovered.insert(seed_rel.clone());
399
400    let mut nodes: Vec<ContextNode> = Vec::new();
401    let mut frontier: VecDeque<PathBuf> = VecDeque::new();
402    frontier.push_back(seed_rel.clone());
403
404    // Count of distinct non-seed nodes admitted to the BFS. Once it hits
405    // `max_nodes` we stop discovering new nodes, which stops enqueuing them, which
406    // stops the per-node full-store backlinks scan they would have triggered — the
407    // cap bounds the *traversal cost*, not only the printed result.
408    let mut admitted = 0usize;
409    let cap_reached = |admitted: usize| max_nodes.is_some_and(|cap| admitted >= cap);
410
411    let mut hop = 0u32;
412    while hop < hops && !frontier.is_empty() && !cap_reached(admitted) {
413        hop += 1;
414        let level_size = frontier.len();
415        for _ in 0..level_size {
416            if cap_reached(admitted) {
417                break;
418            }
419            let current = frontier.pop_front().expect("frontier non-empty");
420
421            // Collect this node's edges in the requested direction(s). Each
422            // edge carries the neighbor path + the direction we traversed it.
423            let mut edges: Vec<(PathBuf, Direction)> = Vec::new();
424            if matches!(direction, Direction::Outgoing | Direction::Both) {
425                for nbr in forwardlinks(store, &current)? {
426                    edges.push((nbr, Direction::Outgoing));
427                }
428            }
429            if matches!(direction, Direction::Incoming | Direction::Both) {
430                for nbr in backlinks(store, &current)? {
431                    edges.push((nbr, Direction::Incoming));
432                }
433            }
434
435            for (neighbor, dir) in edges {
436                if cap_reached(admitted) {
437                    break;
438                }
439                // Drop a neighbor that exists on disk but resolves OUTSIDE the
440                // store via a symlinked path component — it is not a real in-store
441                // edge, exactly as a `..` escape is dropped at edge extraction. This
442                // yields no node (and no traversal through it), closing the
443                // `graph neighborhood` disclosure vector at the graph boundary.
444                if target_escapes_store(store, &neighbor) {
445                    continue;
446                }
447                if !discovered.insert(neighbor.clone()) {
448                    continue;
449                }
450                admitted += 1;
451                let (summary, type_) = read_summary_and_type(store, &neighbor);
452                let include = type_filter.is_empty()
453                    || type_
454                        .as_deref()
455                        .map(|t| type_filter.contains(t))
456                        .unwrap_or(false);
457                if include {
458                    nodes.push(ContextNode {
459                        path: neighbor.clone(),
460                        summary,
461                        type_,
462                        hops: hop,
463                        via: Some((current.clone(), dir)),
464                    });
465                }
466                // Off-type nodes are not emitted but still seed the next BFS
467                // level, so the type filter narrows the *result*, not the
468                // reachable graph.
469                frontier.push_back(neighbor);
470            }
471        }
472    }
473
474    Ok(ContextSlice {
475        seed: seed_rel,
476        nodes,
477    })
478}
479
480/// **SWEEP.** Content files with no incoming AND no outgoing wiki-links — the
481/// curation worklist ("ingested but not yet wired into the wiki"). Off the
482/// loop. Optionally scoped to a layer.
483///
484/// A file is an orphan iff it neither links out to another store file nor is
485/// linked to by one. Incoming edges are counted across the *whole* store
486/// (a link from any layer un-orphans a file), even when `layer` scopes the
487/// candidate set. Returns store-relative paths, sorted.
488pub fn orphans(store: &Store, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
489    // One walk of the whole store: for every content file, record (a) whether
490    // it has any outgoing link, and (b) accumulate the set of every target any
491    // file links to (its incoming-edge set). Both come from a single read per
492    // file — the SWEEP cost.
493    let all = walk_content_files(store)?;
494
495    // Every walked content file's edge KEY (NFC-folded, `.md`-stripped). A
496    // wiki-link counts as a live incoming/outgoing edge when it resolves on disk
497    // OR its edge key matches a walked file's. The key match is what makes a
498    // cross-NORMALIZATION link a real edge on a byte-exact filesystem: an NFD
499    // link to an NFC-named file (or vice versa) does NOT satisfy
500    // `resolve_existing`'s `is_file` on Linux (the bytes differ), though it does
501    // on macOS/APFS (which folds NFC/NFD). `link_edge_key` NFC-folds both sides,
502    // so the keys agree on every platform — without this, `orphans` flagged a
503    // live cross-normalization target as an orphan on Linux while macOS hid it.
504    let content_keys: HashSet<String> = all
505        .iter()
506        .filter_map(|abs| rel_path(store, abs))
507        .map(|rel| edge_key(&normalize_target(&rel)))
508        .collect();
509
510    // `linked_to` holds case-folded edge KEYS (not raw paths): the link text may
511    // spell a target with different casing than the on-disk file (e.g.
512    // `[[records/contacts/Sarah-Chen]]` → `sarah-chen.md`), and on a
513    // case-insensitive filesystem that is a real incoming edge. Keying on
514    // `edge_key` so the incoming-edge lookup case-folds is what stops the
515    // false-positive orphan (a file with a live case-variant link reported as
516    // orphaned) — and matches validate, which resolves the same link via the
517    // case-insensitive filesystem.
518    let mut linked_to: HashSet<String> = HashSet::new();
519    let mut has_outgoing: HashMap<PathBuf, bool> = HashMap::new();
520
521    for abs in &all {
522        let rel = match rel_path(store, abs) {
523            Some(r) => r,
524            None => continue,
525        };
526        let self_key = edge_key(&normalize_target(&rel));
527
528        // Lossy decode (see `forwardlinks`): a non-UTF8 byte must not hide a
529        // `[[...]]` edge, or `orphans` would over-report BOTH endpoints of a live
530        // edge as orphans (and `stats` would inflate the orphan count) on a file
531        // with a stray Latin-1 byte beside a valid ASCII link line.
532        let body = match std::fs::read(abs) {
533            Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
534            Err(e) => return Err(StoreError::Io(e)),
535        };
536
537        let mut outgoing = false;
538        for target in extract_link_targets(&body) {
539            if target.is_empty() || edge_key(&target) == self_key {
540                continue;
541            }
542            // A live edge: resolves on disk (handles raw `.eml`/`.pdf` sources and
543            // store containment) OR matches a walked content file by NFC-folded
544            // key (the cross-normalization case `resolve_existing` misses on a
545            // byte-exact filesystem).
546            if resolve_existing(store, Path::new(&target)).is_none()
547                && !content_keys.contains(&edge_key(&target))
548            {
549                continue;
550            }
551            outgoing = true;
552            linked_to.insert(edge_key(&target));
553        }
554        has_outgoing.insert(rel, outgoing);
555    }
556
557    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
558    for abs in &all {
559        let rel = match rel_path(store, abs) {
560            Some(r) => r,
561            None => continue,
562        };
563        if let Some(layer) = layer {
564            if path_layer(&rel) != Some(layer) {
565                continue;
566            }
567        }
568        let outgoing = has_outgoing.get(&rel).copied().unwrap_or(false);
569        let incoming = linked_to.contains(&edge_key(&normalize_target(&rel)));
570        if !outgoing && !incoming {
571            out.insert(rel);
572        }
573    }
574
575    Ok(out.into_iter().collect())
576}
577
578/// **Write-side.** Rewrite every incoming `[[old]]` wiki-link in `text` to
579/// `[[new]]`, preserving any `|display` override and emitting the canonical bare
580/// target (no `.md`). The write-side twin of [`backlinks`]: where `backlinks`
581/// *finds* the files carrying an edge to `old`, this *retargets* that edge to
582/// `new` inside one file's contents.
583///
584/// `old` and `new` are store-relative paths in the wiki-link sense — both are
585/// passed through the same [`normalize_target`] the read side keys on, so the
586/// `.md` and bare spellings of `old` collapse to one target and a match here is
587/// exactly a match [`backlinks`] / [`Store::find_links_to`](crate::Store::find_links_to)
588/// would report. A link is rewritten iff its normalized target equals
589/// `normalize_target(old)`; prefix collisions (`old=a/b` vs `[[a/bc]]`) and
590/// short-form links never match. Returns the rewritten text (identical to the
591/// input when nothing matched), so the caller can cheaply detect a no-op.
592///
593/// Operates on the raw text (not a parser round-trip) so a link in frontmatter
594/// or body is retargeted uniformly and nothing else is reflowed — **except** a
595/// `[[...]]` inside a ``` fenced code block, which is a documentation example,
596/// not an edge: `rename` must NOT mutate fenced verbatim content (validate
597/// treats fenced links as non-edges, so rewriting them silently corrupts the
598/// example and makes rename disagree with validate). Matching is fence-aware,
599/// whitespace-trimmed, and case-folded to the filesystem, the exact edge notion
600/// [`backlinks`]/[`forwardlinks`] use — so rename retargets precisely the edges
601/// those report and nothing else.
602pub fn rewrite_links_to(text: &str, old: &Path, new: &Path) -> String {
603    let old_target = normalize_target(old);
604    let new_target = normalize_target(new);
605    if old_target.is_empty() {
606        // No target to match → never rewrite anything.
607        return text.to_string();
608    }
609    let old_key = edge_key(&old_target);
610
611    let mut out = String::with_capacity(text.len());
612
613    // Split off the leading `---`…`---` frontmatter block exactly like the read
614    // side ([`Store::extract_edge_targets`] via `split_frontmatter_raw`): the
615    // frontmatter is YAML, NOT markdown — it has no code fences, and a `[[…]]`
616    // in any frontmatter field is a real edge. So the frontmatter region is
617    // rewrite-scanned WITHOUT fence tracking, and the body is rewrite-scanned
618    // with a FRESH fence state. Without this boundary reset, a stray ``` / `~~~`
619    // inside a frontmatter block scalar opens a fence that persists into the
620    // body, so every body `[[…]]` is treated as fenced and silently skipped —
621    // leaving a dangling link after rename even though `backlinks`/`forwardlinks`
622    // (which DO reset at this boundary) still report the body edge. Returns
623    // byte offsets so the `---` fence lines and everything else are copied
624    // byte-exact; the only mutation is a matched `[[…]]` retarget.
625    let body_start = match frontmatter_body_split(text) {
626        Some(body_offset) => {
627            // Frontmatter prefix = `0..body_offset` (the opening `---` line, the
628            // YAML, and the closing `---` line). Scan it line-by-line with
629            // rewriting on and NO fence state: the literal `---` fence lines
630            // never match link syntax (rewrite is a no-op on them), and any
631            // real `[[…]]` in a YAML field is retargeted.
632            for line in text[..body_offset].split_inclusive('\n') {
633                rewrite_links_in_line(line, &old_key, &new_target, &mut out);
634            }
635            body_offset
636        }
637        // No leading frontmatter block → the whole text is body.
638        None => 0,
639    };
640
641    // Body scan with a FRESH fence state. Track the fence as a `(byte, run
642    // length)` exactly like validate and `extract_edge_targets` (NOT a bool
643    // toggled on any ``` / ~~~ line). The naive toggle flips mid-block on a
644    // nested/indented/long-run fence, so a fenced example link would be
645    // rewritten — corrupting documentation and making rename disagree with
646    // validate's edge notion.
647    let mut fence: Option<(u8, usize)> = None;
648    // `split_inclusive` keeps each line's trailing `\n`, so copying a chunk
649    // verbatim preserves the original line endings exactly.
650    for line in text[body_start..].split_inclusive('\n') {
651        // The fence rules key on line content without trailing `\r`/`\n`; the
652        // full chunk (line endings intact) is what we copy verbatim.
653        let content = line.trim_end_matches('\n').trim_end_matches('\r');
654        if let Some(f) = fence {
655            // Inside a fenced code block: copy verbatim, never rewrite. Only a
656            // matching closing fence ends the block.
657            if fence_closes(content, f) {
658                fence = None;
659            }
660            out.push_str(line);
661            continue;
662        }
663        if let Some(opened) = fence_opens(content) {
664            fence = Some(opened);
665            out.push_str(line);
666            continue;
667        }
668        rewrite_links_in_line(line, &old_key, &new_target, &mut out);
669    }
670    out
671}
672
673/// Byte offset where the body begins after a leading `---`…`---` frontmatter
674/// block — i.e. the first byte past the closing `---` line's `\n`. `None` when
675/// the text does not open with a `---` fence or has no closing fence (the caller
676/// then treats the whole text as body). Local mirror of store's
677/// `split_frontmatter_raw` boundary detection (BOM- and CRLF-tolerant) — kept
678/// in graph.rs so the module stays self-contained, paired with the existing
679/// `frontmatter_block` mirror. Returns an offset (not slices) so
680/// [`rewrite_links_to`] can copy the frontmatter and body regions byte-exact and
681/// scan them with different fence policies.
682fn frontmatter_body_split(text: &str) -> Option<usize> {
683    // Tolerate a single leading UTF-8 BOM, matching parser/store/index/validate.
684    let bom = if text.starts_with('\u{feff}') {
685        '\u{feff}'.len_utf8()
686    } else {
687        0
688    };
689    let after_open = if text[bom..].starts_with("---\n") {
690        bom + 4
691    } else if text[bom..].starts_with("---\r\n") {
692        bom + 5
693    } else {
694        return None;
695    };
696    // Walk lines from just after the opening fence; the body starts right after
697    // the line that is exactly `---`.
698    let mut idx = after_open;
699    for line in text[after_open..].split_inclusive('\n') {
700        let trimmed = line.trim_end_matches(['\r', '\n']);
701        idx += line.len();
702        if trimmed == "---" {
703            return Some(idx);
704        }
705    }
706    None
707}
708
709/// Rewrite every `[[...]]` on a single (non-fenced) line whose target matches
710/// `old_key`, appending the result to `out`. Preserves any `|display` override
711/// verbatim and emits the canonical bare `new_target`. A `[[...]]` whose target
712/// does not match (a prefix sibling, the short form, an unrelated target) is
713/// copied through untouched.
714fn rewrite_links_in_line(line: &str, old_key: &str, new_target: &str, out: &mut String) {
715    let bytes = line.as_bytes();
716    let mut i = 0usize;
717    let mut last = 0usize;
718    while i + 1 < bytes.len() {
719        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
720            if let Some(close) = line[i + 2..].find("]]") {
721                let inner = &line[i + 2..i + 2 + close];
722                // An embedded newline means this isn't a single-line link.
723                if !inner.contains('\n') {
724                    let (raw_target, display) = match inner.split_once('|') {
725                        Some((t, d)) => (t, Some(d)),
726                        None => (inner, None),
727                    };
728                    let raw_target = raw_target.trim();
729                    // Match on the SAME edge key the read side uses, so `[[old]]`,
730                    // `[[old.md]]`, `[[ ./old ]]`, and (case-insensitive FS)
731                    // `[[Old]]` all retarget while `[[old-jr]]` never does.
732                    if !raw_target.is_empty()
733                        && !raw_target.starts_with('[')
734                        && edge_key(&canonical_link_target(raw_target)) == old_key
735                    {
736                        out.push_str(&line[last..i]);
737                        out.push_str("[[");
738                        out.push_str(new_target);
739                        if let Some(display) = display {
740                            out.push('|');
741                            out.push_str(display);
742                        }
743                        out.push_str("]]");
744                        i = i + 2 + close + 2;
745                        last = i;
746                        continue;
747                    }
748                }
749                // Not a matching link: skip past this `]]` so an inner `[[`
750                // isn't re-scanned, but leave the text for the verbatim copy.
751                i = i + 2 + close + 2;
752                continue;
753            }
754        }
755        i += 1;
756    }
757    out.push_str(&line[last..]);
758}
759
760// ── Private helpers ─────────────────────────────────────────────────────────
761
762/// Normalize a store-relative path into the canonical wiki-link target form:
763/// forward slashes, no leading `./` or `/`, and no trailing `.md`. This is the
764/// canonical (case-PRESERVING) identity used for output and rewrites; edge
765/// *comparisons* go through [`edge_key`] so the `.md`/bare forms AND (on a
766/// case-insensitive filesystem) case-variant spellings of a target unify. The
767/// shared [`canonical_link_target`] is the single definition every db.md
768/// link op keys on.
769fn normalize_target(path: &Path) -> String {
770    canonical_link_target(&path.to_string_lossy())
771}
772
773/// The comparison key for an edge: the canonical target case-folded to the
774/// filesystem (identity on a case-sensitive FS, lowercased on macOS/Windows), so
775/// the string-keyed graph compares agree with the filesystem's case-insensitive
776/// `is_file()` resolution. `[[records/contacts/Sarah-Chen]]` and the on-disk
777/// `sarah-chen.md` must be the same edge on a case-insensitive filesystem or
778/// backlinks/orphans/rename silently disagree with validate.
779fn edge_key(canonical_target: &str) -> String {
780    link_edge_key(canonical_target)
781}
782
783/// Extract every wiki-link target from a body, normalized to the canonical
784/// store-relative form. Fence-aware and whitespace-trimmed via the shared
785/// [`extract_edge_targets`] — a `[[...]]` inside a ``` fenced code block is a
786/// documentation example, NOT an edge (matching validate), and `[[ x ]]`
787/// padding resolves identically to `[[x]]`. A target that would escape the store
788/// root (a `..` component) is dropped here too, so an escaping `[[../outside/x]]`
789/// is never reported as a forward edge and never seeds a [`neighborhood`]
790/// traversal out of the store (the disclosure vector validate flags as an
791/// error). Order-preserving; duplicates kept (callers dedup).
792fn extract_link_targets(body: &str) -> Vec<String> {
793    extract_edge_targets(body)
794        .into_iter()
795        .filter(|t| is_within_store_target(t))
796        .collect()
797}
798
799/// True if a canonical target stays inside the store: it has no `..`
800/// (`ParentDir`) component. The canonical form has already stripped any leading
801/// `./` or `/`, so a `Normal`-only path is a safe store-relative key; a `..`
802/// component is an escape and is rejected, mirroring validate's safe-path guard.
803fn is_within_store_target(target: &str) -> bool {
804    Path::new(target)
805        .components()
806        .all(|c| matches!(c, std::path::Component::Normal(_)))
807}
808
809/// Resolve the store root + a store-relative path to the absolute on-disk file,
810/// trying the path as written and then with a `.md` extension. `None` if neither
811/// exists **or if the target resolves outside the store root** — a `..`-laden or
812/// symlink-escaping wiki-link must never turn a graph read/traversal into a read
813/// of an arbitrary file outside the store (the `dbmd graph neighborhood`
814/// disclosure vector). Containment is enforced via the shared
815/// [`ensure_path_within_store`] gate, matching validate's safe-path guard.
816fn resolve_existing(store: &Store, store_relative: &Path) -> Option<PathBuf> {
817    let direct = store.root.join(store_relative);
818    if direct.is_file() && resolves_within_store(store, &direct) {
819        return Some(direct);
820    }
821    let normalized = normalize_target(store_relative);
822    let with_md = store.root.join(format!("{normalized}.md"));
823    if with_md.is_file() && resolves_within_store(store, &with_md) {
824        return Some(with_md);
825    }
826    None
827}
828
829/// True if a store-relative wiki-link target exists on disk but **resolves
830/// outside the store** — i.e. some `Normal` component is a symlink redirecting to
831/// an external dir/file (`records/linkdir/secret` through `records/linkdir ->
832/// /external`, or a directly-symlinked `records/aliased.md -> /external/x.md`).
833///
834/// This is the symlink twin of the `..` escape that [`is_within_store_target`]
835/// drops at edge *extraction*: a `..` target is rejected by its spelling, but a
836/// symlink escape is spelled with only `Normal` components and can only be caught
837/// by resolving the path. [`neighborhood_capped`] uses this to drop such a
838/// neighbor from the traversal entirely, so an escaping symlink yields **no node**
839/// (matching the `..` control) rather than a phantom node whose summary/type are
840/// blanked — closing the `graph neighborhood` disclosure vector at the graph
841/// boundary, not only at the file read.
842///
843/// A genuinely *dangling* in-store link (a target that exists nowhere) is **not**
844/// an escape: it does not resolve on disk at all, so this returns `false` and the
845/// dangling target is still surfaced as a node (existing behavior; broken-link
846/// reporting is [`crate::validate`]'s job).
847fn target_escapes_store(store: &Store, store_relative: &Path) -> bool {
848    // Already in-store-resolvable → not an escape.
849    if resolve_existing(store, store_relative).is_some() {
850        return false;
851    }
852    // Not resolvable in-store: is it because it points OUTSIDE (a symlink escape),
853    // or because it does not exist at all (a dangling link)? It escapes iff the
854    // path (as written or with `.md`) exists on disk yet fails containment.
855    let direct = store.root.join(store_relative);
856    if direct.exists() && !resolves_within_store(store, &direct) {
857        return true;
858    }
859    let normalized = normalize_target(store_relative);
860    let with_md = store.root.join(format!("{normalized}.md"));
861    with_md.exists() && !resolves_within_store(store, &with_md)
862}
863
864/// Containment check for a candidate on-disk path. Always routes through the
865/// authoritative, symlink-resolving [`ensure_path_within_store`] gate — the only
866/// thing that can prove an escaping or symlink-redirected path actually stays
867/// inside the store.
868///
869/// There is deliberately **no** "all-`Normal`-components" fast path that returns
870/// `true` without canonicalizing. A `Normal` component is not safe by spelling:
871/// it can itself be a symlink to a directory or file outside the store
872/// (`records/linkdir -> /etc`, or a directly-symlinked `records/aliased.md ->
873/// ../../outside/secret.md`). `store.root.join(rel)` follows that in-store symlink,
874/// `is_file()` succeeds (it follows symlinks), and without canonicalizing the
875/// resolved target the out-of-store file's `summary`/`type` leak into a
876/// `graph neighborhood` slice. `ensure_path_within_store` canonicalizes `abs`
877/// (resolving every symlink in its chain) and confirms the result is under the
878/// canonicalized root, closing that disclosure vector — the same gate the `..`
879/// path already passes through.
880fn resolves_within_store(store: &Store, abs: &Path) -> bool {
881    ensure_path_within_store(&store.root, abs).is_ok()
882}
883
884/// Convert an absolute path under the store root into its store-relative form.
885fn rel_path(store: &Store, abs: &Path) -> Option<PathBuf> {
886    abs.strip_prefix(&store.root).ok().map(|p| p.to_path_buf())
887}
888
889/// Which layer a store-relative path sits in, by its first component.
890fn path_layer(rel: &Path) -> Option<Layer> {
891    let first = rel.components().next()?;
892    match first.as_os_str().to_str()? {
893        "sources" => Some(Layer::Sources),
894        "records" => Some(Layer::Records),
895        _ => None,
896    }
897}
898
899/// True if a store-relative path is a *content* file: under `sources/` or
900/// `records/`, a `.md` file, and not an `index.md`. Meta files
901/// (`DB.md`, `log.md`, `log/…`, sidecars) are excluded.
902fn is_content_rel(rel: &Path) -> bool {
903    if path_layer(rel).is_none() {
904        return false;
905    }
906    match rel.extension().and_then(|e| e.to_str()) {
907        Some("md") => {}
908        _ => return false,
909    }
910    rel.file_name().and_then(|n| n.to_str()) != Some("index.md")
911}
912
913/// Walk every content `.md` file in the store via the **`ignore`** walker
914/// (the ripgrep directory engine). Only the two layer roots
915/// (`sources/`/`records/`) are descended, so `DB.md`, `log.md`, and
916/// `log/` at the store root are structurally never reached; hidden dirs and
917/// per-folder `index.md` sidecars are filtered out ([`is_content_rel`]). Honors
918/// `.gitignore` the way `rg` does. Returns absolute paths. SWEEP-class.
919fn walk_content_files(store: &Store) -> Result<Vec<PathBuf>, StoreError> {
920    let mut out = Vec::new();
921    for layer in Layer::all() {
922        let dir = store.root.join(layer_dir_name(layer));
923        if !dir.is_dir() {
924            continue;
925        }
926        let walker = WalkBuilder::new(&dir)
927            .hidden(true)
928            .git_ignore(true)
929            .git_global(false)
930            .require_git(false)
931            // Follow symlinks so a symlinked `.md` content file or a symlinked
932            // type folder is walked like any other content (consistent with the
933            // store SWEEP walker), rather than silently vanishing from orphans.
934            .follow_links(true)
935            .build();
936        for result in walker {
937            let entry = result.map_err(|e| StoreError::Search {
938                root: store.root.clone(),
939                message: format!("walk failed: {e}"),
940            })?;
941            // A followed symlink entry reports its own type as `is_symlink()`, so
942            // also accept a symlink whose target is a regular file.
943            let is_file = match entry.file_type() {
944                Some(ft) if ft.is_file() => true,
945                Some(ft) if ft.is_symlink() => std::fs::metadata(entry.path())
946                    .map(|m| m.is_file())
947                    .unwrap_or(false),
948                _ => false,
949            };
950            if !is_file {
951                continue;
952            }
953            let abs = entry.into_path();
954            if let Some(rel) = rel_path(store, &abs) {
955                if is_content_rel(&rel) {
956                    out.push(abs);
957                }
958            }
959        }
960    }
961    Ok(out)
962}
963
964/// The on-disk folder name for a layer. Mirrors `Layer::dir_name`; kept local
965/// so the graph module owns its own copy rather than coupling to that body.
966fn layer_dir_name(layer: Layer) -> &'static str {
967    match layer {
968        Layer::Sources => "sources",
969        Layer::Records => "records",
970    }
971}
972
973/// Read a reached node's `summary` and `type` from its frontmatter. A missing
974/// file, missing frontmatter, or unparseable YAML degrades to an empty summary
975/// / unknown type rather than failing the whole hydration — `neighborhood` is
976/// best-effort context assembly, not validation.
977fn read_summary_and_type(store: &Store, rel: &Path) -> (String, Option<String>) {
978    let abs = match resolve_existing(store, rel) {
979        Some(a) => a,
980        None => return (String::new(), None),
981    };
982    // Lossy decode so a node's summary/type still resolve when the file carries
983    // a stray non-UTF8 byte (consistent with the edge readers above).
984    let text = match std::fs::read(&abs) {
985        Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
986        Err(_) => return (String::new(), None),
987    };
988    let yaml = match frontmatter_block(&text) {
989        Some(y) => y,
990        None => return (String::new(), None),
991    };
992    let value: serde_norway::Value = match serde_norway::from_str(yaml) {
993        Ok(v) => v,
994        Err(_) => return (String::new(), None),
995    };
996    let summary = value
997        .get("summary")
998        .and_then(|v| v.as_str())
999        .unwrap_or("")
1000        .to_string();
1001    let type_ = value
1002        .get("type")
1003        .and_then(|v| v.as_str())
1004        .map(|s| s.to_string());
1005    (summary, type_)
1006}
1007
1008/// Return the YAML between the opening and closing `---` fences (exclusive), or
1009/// `None` if the text has no leading frontmatter block. Local mirror of the
1010/// parser's split so the graph module stays self-contained.
1011fn frontmatter_block(text: &str) -> Option<&str> {
1012    // Tolerate a single leading UTF-8 BOM, matching parser/store/index/validate.
1013    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
1014    let rest = text
1015        .strip_prefix("---\n")
1016        .or_else(|| text.strip_prefix("---\r\n"))?;
1017    // Find the closing fence: a line that is exactly `---`.
1018    let mut idx = 0usize;
1019    for line in rest.split_inclusive('\n') {
1020        let trimmed = line.trim_end_matches(['\r', '\n']);
1021        if trimmed == "---" {
1022            return Some(&rest[..idx]);
1023        }
1024        idx += line.len();
1025    }
1026    None
1027}
1028
1029#[cfg(test)]
1030mod tests {
1031    use super::*;
1032    use std::fs;
1033    use tempfile::TempDir;
1034
1035    use crate::parser::Config;
1036
1037    // ── Fixture builder ─────────────────────────────────────────────────────
1038    //
1039    // A real on-disk store in a tempdir. We write actual files (frontmatter +
1040    // wiki-links) and exercise the real code paths. The fixture constructs the
1041    // `Store` by its public fields rather than `Store::open`, so the graph
1042    // tests stand on their own and do not depend on any other module's
1043    // behavior. Each test asserts the behavior the SPEC promises, derived from
1044    // intent, never from echoing the function's own output.
1045    //
1046    // `backlinks` (and `neighborhood` in any incoming direction) enumerate their
1047    // candidate set from the type-folder `index.jsonl` sidecars — the loop
1048    // contract: never a whole-store content walk. A real db.md store maintains
1049    // those sidecars write-through, so a test that exercises backlinks must call
1050    // [`Fixture::reindex`] after writing its files to build them (the SWEEP that
1051    // `dbmd index rebuild` runs). Forwardlinks/orphans read content directly and
1052    // need no sidecar.
1053
1054    struct Fixture {
1055        _tmp: TempDir,
1056        store: Store,
1057    }
1058
1059    impl Fixture {
1060        fn new() -> Self {
1061            let tmp = TempDir::new().expect("tempdir");
1062            let root = tmp.path().to_path_buf();
1063            fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n# store\n").expect("DB.md");
1064            let store = Store {
1065                root,
1066                config: Config::default(),
1067            };
1068            Fixture { _tmp: tmp, store }
1069        }
1070
1071        /// Write a content file at a store-relative path with the given type,
1072        /// summary, and body. Creates parent dirs.
1073        fn write(&self, rel: &str, type_: &str, summary: &str, body: &str) {
1074            let abs = self.store.root.join(rel);
1075            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
1076            let contents = format!(
1077                "---\ntype: {type_}\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: {summary}\n---\n{body}\n"
1078            );
1079            fs::write(&abs, contents).expect("write file");
1080        }
1081
1082        /// Write a raw file verbatim (for frontmatter-shape edge cases).
1083        fn write_raw(&self, rel: &str, contents: &str) {
1084            let abs = self.store.root.join(rel);
1085            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
1086            fs::write(&abs, contents).expect("write raw");
1087        }
1088
1089        /// Build the type-folder `index.jsonl` sidecars from the content written
1090        /// so far — the state a real store is always in (write-through), and the
1091        /// candidate set `backlinks` reads. Call after writing files in any test
1092        /// that exercises `backlinks` or an incoming-direction `neighborhood`.
1093        fn reindex(&self) {
1094            crate::index::Index::rebuild_all(&self.store).expect("rebuild sidecars");
1095        }
1096
1097        fn p(&self, rel: &str) -> PathBuf {
1098            PathBuf::from(rel)
1099        }
1100    }
1101
1102    fn paths(v: &[PathBuf]) -> Vec<String> {
1103        v.iter()
1104            .map(|p| p.to_string_lossy().replace('\\', "/"))
1105            .collect()
1106    }
1107
1108    // ── normalize_target ────────────────────────────────────────────────────
1109
1110    #[test]
1111    fn normalize_strips_md_and_leading_dotslash() {
1112        assert_eq!(
1113            normalize_target(Path::new("records/contacts/sarah.md")),
1114            "records/contacts/sarah"
1115        );
1116        assert_eq!(
1117            normalize_target(Path::new("./records/profiles/elena")),
1118            "records/profiles/elena"
1119        );
1120        assert_eq!(normalize_target(Path::new("/records/x")), "records/x");
1121        // Bare and `.md` forms must collapse to the same key, or edges won't unify.
1122        assert_eq!(
1123            normalize_target(Path::new("a/b")),
1124            normalize_target(Path::new("a/b.md"))
1125        );
1126    }
1127
1128    // ── extract_link_targets (forwardlinks core) ────────────────────────────
1129
1130    #[test]
1131    fn extract_handles_display_text_and_md_suffix() {
1132        let body = "See [[records/profiles/sarah-chen|Sarah]] and [[records/contacts/elena.md]].";
1133        let got = extract_link_targets(body);
1134        assert_eq!(
1135            got,
1136            vec!["records/profiles/sarah-chen", "records/contacts/elena"]
1137        );
1138    }
1139
1140    #[test]
1141    fn extract_ignores_external_markdown_links() {
1142        // Standard markdown links are NOT wiki-links and must not be extracted
1143        // (SPEC: external refs don't participate in the graph).
1144        let body = "[Acme](https://acme.io) but [[records/companies/acme]] is internal.";
1145        let got = extract_link_targets(body);
1146        assert_eq!(got, vec!["records/companies/acme"]);
1147    }
1148
1149    #[test]
1150    fn extract_display_text_is_not_treated_as_a_target() {
1151        // A `|display` segment that looks path-like must not become a target;
1152        // only the part before `|` is the link target.
1153        let body = "[[records/contacts/sarah|sources/emails/decoy]]";
1154        let got = extract_link_targets(body);
1155        assert_eq!(got, vec!["records/contacts/sarah"]);
1156    }
1157
1158    // ── rewrite_links_to (write-side twin of backlinks) ─────────────────────
1159
1160    #[test]
1161    fn rewrite_plain_link_to_canonical_new_target() {
1162        let got = rewrite_links_to(
1163            "See [[records/contacts/sarah-chen]] today.",
1164            Path::new("records/contacts/sarah-chen"),
1165            Path::new("records/contacts/sarah-chen-acme"),
1166        );
1167        assert_eq!(got, "See [[records/contacts/sarah-chen-acme]] today.");
1168    }
1169
1170    #[test]
1171    fn rewrite_preserves_display_override() {
1172        let got = rewrite_links_to(
1173            "With [[records/contacts/sarah-chen|Sarah]].",
1174            Path::new("records/contacts/sarah-chen"),
1175            Path::new("records/contacts/sarah-chen-acme"),
1176        );
1177        assert_eq!(got, "With [[records/contacts/sarah-chen-acme|Sarah]].");
1178    }
1179
1180    #[test]
1181    fn rewrite_matches_md_suffixed_old_and_emits_bare_new() {
1182        // The `.md` spelling of the old target must match (it normalizes to the
1183        // same key the read side uses), and the new target is emitted bare —
1184        // the writer doctrine validate enforces (`WIKI_LINK_HAS_EXTENSION`).
1185        let got = rewrite_links_to(
1186            "[[records/contacts/sarah-chen.md]]",
1187            Path::new("records/contacts/sarah-chen"),
1188            Path::new("records/contacts/new.md"),
1189        );
1190        assert_eq!(got, "[[records/contacts/new]]");
1191    }
1192
1193    #[test]
1194    fn rewrite_leaves_prefix_collisions_and_short_form_untouched() {
1195        // Boundary correctness, anchored to the SAME normalize_target the read
1196        // side keys on: `records/contacts/sarah-chen` must NOT match the longer
1197        // `[[…-jr]]`, the short-form `[[sarah-chen]]`, or an unrelated target.
1198        let input = "[[records/contacts/sarah-chen-jr]] [[sarah-chen]] [[records/concepts/x]]";
1199        let got = rewrite_links_to(
1200            input,
1201            Path::new("records/contacts/sarah-chen"),
1202            Path::new("records/contacts/new"),
1203        );
1204        assert_eq!(got, input, "no genuine edge to the seed → text unchanged");
1205    }
1206
1207    #[test]
1208    fn rewrite_handles_multiple_occurrences_and_mixed_spellings() {
1209        let got = rewrite_links_to(
1210            "[[records/x]] then [[./records/x]] and [[records/x.md|d]] end",
1211            Path::new("records/x"),
1212            Path::new("records/y"),
1213        );
1214        // All three spellings of the same target retarget; the display survives.
1215        assert_eq!(
1216            got,
1217            "[[records/y]] then [[records/y]] and [[records/y|d]] end"
1218        );
1219    }
1220
1221    #[test]
1222    fn rewrite_retargets_exactly_the_edges_the_core_parser_sees() {
1223        // The load-bearing property of moving the rewrite into core: the write
1224        // side must operate on EXACTLY the edge set the read side recognizes —
1225        // the same `extract_link_targets` / `normalize_target` grammar that
1226        // `forwardlinks` is built on. Anchor the test to that grammar (via
1227        // `forwardlinks` on a real file) rather than re-listing literals, so a
1228        // future divergence between the read parser and the write rewrite fails
1229        // here. (Coupled to `forwardlinks` — the single-file edge extractor —
1230        // not the multi-file `backlinks` traversal, so it tests the grammar, not
1231        // the walk.)
1232        let fx = Fixture::new();
1233        let body = "Met [[records/contacts/sarah.md|Sarah]] and not [[records/contacts/sarah-2]].";
1234        fx.write("records/profiles/bio.md", "profile", "bio", body);
1235
1236        // Read side: the parser sees two outgoing edges, both in canonical bare
1237        // form (the `.md` spelling collapsed). `sarah` is a real edge here.
1238        let edges = forwardlinks(&fx.store, &fx.p("records/profiles/bio.md")).unwrap();
1239        assert_eq!(
1240            paths(&edges),
1241            vec!["records/contacts/sarah", "records/contacts/sarah-2"],
1242            "fixture must contain exactly the two edges this test reasons about"
1243        );
1244
1245        // Write side: rewriting `sarah → sarah-chen` must retarget the edge the
1246        // parser recognized (matching the `.md` spelling), preserve the display,
1247        // and leave the unrelated `sarah-2` edge untouched.
1248        let got = rewrite_links_to(
1249            body,
1250            Path::new("records/contacts/sarah"),
1251            Path::new("records/contacts/sarah-chen"),
1252        );
1253        assert_eq!(
1254            got,
1255            "Met [[records/contacts/sarah-chen|Sarah]] and not [[records/contacts/sarah-2]]."
1256        );
1257
1258        // Cross-check through the parser: the rewritten text's edge set is the
1259        // original with `sarah` swapped for `sarah-chen` — proving the rewrite
1260        // moved exactly one edge, the one the read side keyed on.
1261        fx.write("records/profiles/bio.md", "profile", "bio", &got);
1262        let after = forwardlinks(&fx.store, &fx.p("records/profiles/bio.md")).unwrap();
1263        assert_eq!(
1264            paths(&after),
1265            vec!["records/contacts/sarah-2", "records/contacts/sarah-chen"],
1266            "after rewrite the parser must see the new target and not the old"
1267        );
1268    }
1269
1270    #[test]
1271    fn rewrite_empty_old_target_is_a_no_op() {
1272        // A degenerate `old` (normalizes to empty) must never rewrite anything,
1273        // mirroring backlinks' empty-target guard.
1274        let input = "[[records/x]] [[]] text";
1275        let got = rewrite_links_to(input, Path::new(""), Path::new("records/y"));
1276        assert_eq!(got, input);
1277    }
1278
1279    #[test]
1280    fn rewrite_no_match_returns_input_unchanged() {
1281        let input = "no links, [external](https://x), and [[records/concepts/y]]";
1282        let got = rewrite_links_to(input, Path::new("records/x"), Path::new("records/z"));
1283        assert_eq!(got, input);
1284    }
1285
1286    #[test]
1287    fn rewrite_does_not_corrupt_links_in_nested_or_long_run_fences() {
1288        // Regression for the naive `starts_with("```")/("~~~")` toggle in the
1289        // rewriter: a fenced example documenting wiki-link syntax must be copied
1290        // VERBATIM, never retargeted — matching validate's edge notion. The
1291        // standard nested-fence convention (a ````-run block wrapping a ```
1292        // example) used to flip the bool mid-block, so the example link was
1293        // rewritten (silent documentation corruption).
1294        let body = "\
1295Here is how to write a link:
1296
1297````
1298```
1299[[records/contacts/bob]]
1300```
1301still fenced [[records/contacts/bob]]
1302````
1303
1304Real link: [[records/contacts/bob]].
1305";
1306        let got = rewrite_links_to(
1307            body,
1308            Path::new("records/contacts/bob"),
1309            Path::new("records/contacts/robert"),
1310        );
1311        // The two fenced examples are untouched; only the real link retargets.
1312        let expected = "\
1313Here is how to write a link:
1314
1315````
1316```
1317[[records/contacts/bob]]
1318```
1319still fenced [[records/contacts/bob]]
1320````
1321
1322Real link: [[records/contacts/robert]].
1323";
1324        assert_eq!(
1325            got, expected,
1326            "fenced example links must survive a rename verbatim; only live edges retarget"
1327        );
1328    }
1329
1330    #[test]
1331    fn rewrite_frontmatter_fence_does_not_swallow_body_link() {
1332        // Regression for the frontmatter/body fence-boundary data-loss bug: a
1333        // stray ``` inside a YAML block scalar in frontmatter used to open a code
1334        // fence that persisted into the body, so the rewriter treated every body
1335        // `[[…]]` as fenced and skipped it — leaving a dangling link after rename
1336        // even though `backlinks`/`forwardlinks` (which reset fence state at the
1337        // frontmatter boundary) still report the body edge. The write side must
1338        // split the frontmatter off and scan the body with a FRESH fence state,
1339        // exactly like the read side, so rename and the graph reads agree.
1340        let fx = Fixture::new();
1341        let text = "\
1342---
1343type: meeting
1344created: 2026-05-27T08:00:00-07:00
1345updated: 2026-05-27T08:00:00-07:00
1346summary: Notes
1347note: |
1348  fence with no close:
1349  ```
1350---
1351Met with [[records/contacts/sarah-chen]] yesterday.
1352";
1353        fx.write_raw("records/meeting.md", text);
1354
1355        // Read side: despite the stray fence in frontmatter, the body edge is a
1356        // live forward edge (fence state resets at the frontmatter boundary).
1357        let edges = forwardlinks(&fx.store, &fx.p("records/meeting.md")).unwrap();
1358        assert_eq!(
1359            paths(&edges),
1360            vec!["records/contacts/sarah-chen"],
1361            "read side must report the body edge despite the frontmatter fence"
1362        );
1363
1364        // Write side: rename must retarget that exact body edge — not skip it as
1365        // fenced. Output is byte-exact everywhere else (frontmatter verbatim,
1366        // including the stray ```).
1367        let got = rewrite_links_to(
1368            text,
1369            Path::new("records/contacts/sarah-chen"),
1370            Path::new("records/contacts/sc2"),
1371        );
1372        let expected = "\
1373---
1374type: meeting
1375created: 2026-05-27T08:00:00-07:00
1376updated: 2026-05-27T08:00:00-07:00
1377summary: Notes
1378note: |
1379  fence with no close:
1380  ```
1381---
1382Met with [[records/contacts/sc2]] yesterday.
1383";
1384        assert_eq!(
1385            got, expected,
1386            "the body link the read side reports must be rewritten; frontmatter copied verbatim"
1387        );
1388
1389        // Cross-check through the parser: after rewrite the read side sees the new
1390        // target and no trace of the old — rename and the graph reads agree.
1391        fx.write_raw("records/meeting.md", &got);
1392        let after = forwardlinks(&fx.store, &fx.p("records/meeting.md")).unwrap();
1393        assert_eq!(
1394            paths(&after),
1395            vec!["records/contacts/sc2"],
1396            "after rename the read side must report only the retargeted edge"
1397        );
1398    }
1399
1400    #[test]
1401    fn rewrite_link_genuinely_inside_a_body_fence_is_left_untouched() {
1402        // The boundary reset must not over-correct: a `[[…]]` truly inside a BODY
1403        // code fence is a documentation example, NOT an edge (matching the read
1404        // side), and must survive rename verbatim. This pairs with the
1405        // frontmatter-fence test: the body still gets a fresh, real fence state.
1406        let fx = Fixture::new();
1407        let text = "\
1408---
1409type: meeting
1410created: 2026-05-27T08:00:00-07:00
1411updated: 2026-05-27T08:00:00-07:00
1412summary: Notes
1413---
1414Real link: [[records/contacts/sarah-chen]].
1415
1416```
1417Example: [[records/contacts/sarah-chen]]
1418```
1419";
1420        fx.write_raw("records/meeting.md", text);
1421
1422        // Read side: only the unfenced body link is an edge; the fenced one is not.
1423        let edges = forwardlinks(&fx.store, &fx.p("records/meeting.md")).unwrap();
1424        assert_eq!(
1425            paths(&edges),
1426            vec!["records/contacts/sarah-chen"],
1427            "only the unfenced body link is a live edge"
1428        );
1429
1430        // Write side: the real link retargets; the fenced example is byte-exact.
1431        let got = rewrite_links_to(
1432            text,
1433            Path::new("records/contacts/sarah-chen"),
1434            Path::new("records/contacts/sc2"),
1435        );
1436        let expected = "\
1437---
1438type: meeting
1439created: 2026-05-27T08:00:00-07:00
1440updated: 2026-05-27T08:00:00-07:00
1441summary: Notes
1442---
1443Real link: [[records/contacts/sc2]].
1444
1445```
1446Example: [[records/contacts/sarah-chen]]
1447```
1448";
1449        assert_eq!(
1450            got, expected,
1451            "a link inside a body fence must survive rename; only the live edge retargets"
1452        );
1453    }
1454
1455    // ── forwardlinks ─────────────────────────────────────────────────────────
1456
1457    #[test]
1458    fn forwardlinks_returns_sorted_deduped_targets_excluding_self() {
1459        let fx = Fixture::new();
1460        fx.write(
1461            "records/projects/renewal.md",
1462            "synthesis",
1463            "Renewal project",
1464            "Links: [[records/contacts/sarah]] [[records/companies/acme]] [[records/contacts/sarah]] and itself [[records/projects/renewal]].",
1465        );
1466        // The targets need not exist on disk for forwardlinks (it reads the one
1467        // file only). Self-links are dropped; duplicates collapse; sorted asc.
1468        let got = forwardlinks(&fx.store, &fx.p("records/projects/renewal.md")).unwrap();
1469        assert_eq!(
1470            paths(&got),
1471            vec!["records/companies/acme", "records/contacts/sarah"]
1472        );
1473    }
1474
1475    #[test]
1476    fn forwardlinks_picks_up_wiki_links_in_frontmatter() {
1477        // SPEC: wiki-links appear in scalar + block-sequence frontmatter fields,
1478        // not just the body. forwardlinks must follow those edges too.
1479        let fx = Fixture::new();
1480        fx.write_raw(
1481            "records/meetings/m1.md",
1482            "---\ntype: meeting\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Renewal sync\ncompany: [[records/companies/acme]]\nattendees:\n  - [[records/contacts/sarah]]\n  - [[records/contacts/elena]]\n---\nNotes about [[records/projects/renewal]].\n",
1483        );
1484        let got = forwardlinks(&fx.store, &fx.p("records/meetings/m1.md")).unwrap();
1485        assert_eq!(
1486            paths(&got),
1487            vec![
1488                "records/companies/acme",
1489                "records/contacts/elena",
1490                "records/contacts/sarah",
1491                "records/projects/renewal",
1492            ]
1493        );
1494    }
1495
1496    #[test]
1497    fn forwardlinks_missing_file_is_empty_not_error() {
1498        let fx = Fixture::new();
1499        let got = forwardlinks(&fx.store, &fx.p("records/profiles/ghost.md")).unwrap();
1500        assert!(got.is_empty());
1501    }
1502
1503    #[test]
1504    fn forwardlinks_resolves_seed_given_without_md_extension() {
1505        let fx = Fixture::new();
1506        fx.write(
1507            "records/profiles/sarah.md",
1508            "profile",
1509            "Sarah bio",
1510            "Works at [[records/companies/acme]].",
1511        );
1512        // Seed passed in bare wiki-link form (no `.md`) must still resolve.
1513        let got = forwardlinks(&fx.store, &fx.p("records/profiles/sarah")).unwrap();
1514        assert_eq!(paths(&got), vec!["records/companies/acme"]);
1515    }
1516
1517    // ── backlinks ──────────────────────────────────────────────────────────
1518
1519    #[test]
1520    fn backlinks_finds_incoming_across_layers_and_link_forms() {
1521        let fx = Fixture::new();
1522        // Target.
1523        fx.write("records/contacts/sarah.md", "contact", "Sarah Chen", "");
1524        // Three different incoming-link spellings, all to the same target.
1525        fx.write(
1526            "records/profiles/sarah.md",
1527            "profile",
1528            "bio",
1529            "See [[records/contacts/sarah]].",
1530        );
1531        fx.write(
1532            "records/meetings/m1.md",
1533            "meeting",
1534            "Renewal call",
1535            "Attendee [[records/contacts/sarah|Sarah]].",
1536        );
1537        fx.write(
1538            "sources/emails/e1.md",
1539            "email",
1540            "Hi",
1541            "From [[records/contacts/sarah.md]] today.",
1542        );
1543        // A file that links to a DIFFERENT contact must not be a backlink.
1544        fx.write(
1545            "records/profiles/other.md",
1546            "profile",
1547            "x",
1548            "[[records/contacts/sarah-2]]",
1549        );
1550        fx.reindex();
1551
1552        // All three link forms ([[x]], [[x|d]], [[x.md]]) resolve to the same
1553        // target and are found; the linkers are returned in canonical bare form.
1554        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1555        assert_eq!(
1556            paths(&got),
1557            vec![
1558                "records/meetings/m1",
1559                "records/profiles/sarah",
1560                "sources/emails/e1",
1561            ]
1562        );
1563    }
1564
1565    #[test]
1566    fn backlinks_and_forwardlinks_round_trip_on_same_key() {
1567        // If A forwardlinks to B, then B backlinks to A — both expressed in the
1568        // identical bare key, so neighborhood can dedup across directions.
1569        let fx = Fixture::new();
1570        fx.write(
1571            "records/profiles/a.md",
1572            "profile",
1573            "A",
1574            "Knows [[records/profiles/b]].",
1575        );
1576        fx.write("records/profiles/b.md", "profile", "B", "");
1577        fx.reindex();
1578        let fwd = forwardlinks(&fx.store, &fx.p("records/profiles/a.md")).unwrap();
1579        let back = backlinks(&fx.store, &fx.p("records/profiles/b.md")).unwrap();
1580        assert_eq!(paths(&fwd), vec!["records/profiles/b"]);
1581        assert_eq!(paths(&back), vec!["records/profiles/a"]);
1582    }
1583
1584    #[test]
1585    fn backlinks_does_not_match_path_prefix_collisions() {
1586        let fx = Fixture::new();
1587        fx.write("records/contacts/sam.md", "contact", "Sam", "");
1588        // `sam-smith` shares the `sam` prefix; must NOT count as a backlink to `sam`.
1589        fx.write(
1590            "records/profiles/x.md",
1591            "profile",
1592            "x",
1593            "[[records/contacts/sam-smith]]",
1594        );
1595        // The genuine backlink.
1596        fx.write(
1597            "records/profiles/y.md",
1598            "profile",
1599            "y",
1600            "[[records/contacts/sam]]",
1601        );
1602        fx.reindex();
1603
1604        let got = backlinks(&fx.store, &fx.p("records/contacts/sam")).unwrap();
1605        assert_eq!(paths(&got), vec!["records/profiles/y"]);
1606    }
1607
1608    #[test]
1609    fn backlinks_excludes_self_reference() {
1610        let fx = Fixture::new();
1611        // A page that links to itself is not its own backlink.
1612        fx.write(
1613            "records/synthesis/overview.md",
1614            "synthesis",
1615            "Overview",
1616            "This page [[records/synthesis/overview]] references itself.",
1617        );
1618        fx.reindex();
1619        let got = backlinks(&fx.store, &fx.p("records/synthesis/overview.md")).unwrap();
1620        assert!(
1621            got.is_empty(),
1622            "self-link must not appear as a backlink, got {got:?}"
1623        );
1624    }
1625
1626    #[test]
1627    fn backlinks_empty_when_nobody_links() {
1628        let fx = Fixture::new();
1629        fx.write("records/contacts/lonely.md", "contact", "Lonely", "");
1630        fx.write(
1631            "records/profiles/unrelated.md",
1632            "profile",
1633            "x",
1634            "[[records/companies/acme]]",
1635        );
1636        fx.reindex();
1637        let got = backlinks(&fx.store, &fx.p("records/contacts/lonely.md")).unwrap();
1638        assert!(got.is_empty());
1639    }
1640
1641    #[test]
1642    fn backlinks_ignores_index_and_meta_files() {
1643        let fx = Fixture::new();
1644        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1645        // An index.md that lists the target must NOT be reported as a backlink
1646        // (indexes are catalog, not relationship edges).
1647        fx.write_raw(
1648            "records/contacts/index.md",
1649            "---\ntype: index\nscope: folder\nfolder: records/contacts\n---\n- [[records/contacts/sarah]] — Sarah\n",
1650        );
1651        fx.reindex();
1652        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1653        assert!(got.is_empty(), "index.md must be excluded, got {got:?}");
1654    }
1655
1656    #[test]
1657    fn backlinks_finds_body_only_edge_not_in_frontmatter_links_field() {
1658        // REGRESSION: the sidecar's `links` field carries only the file's
1659        // frontmatter `links:` list; it does NOT include wiki-links written in
1660        // the body or in other typed frontmatter fields. Answering backlinks
1661        // from `links[]` alone would silently miss this edge. The candidate set
1662        // is sidecar-bounded, but each candidate's edge is confirmed by parsing
1663        // the file (the same extraction forwardlinks uses), so a body-only link
1664        // must still register as a backlink.
1665        let fx = Fixture::new();
1666        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1667        // `meeting.md` links to sarah ONLY in its body — its frontmatter has no
1668        // `links:` field at all, so the sidecar record's `links` is empty.
1669        fx.write(
1670            "records/meetings/standup.md",
1671            "meeting",
1672            "Standup",
1673            "Discussed renewal with [[records/contacts/sarah]].",
1674        );
1675        fx.reindex();
1676
1677        // Guard the premise: the sidecar record really does carry an empty
1678        // `links` (so this test fails loudly if the index ever starts extracting
1679        // body links — at which point the backlink predicate could be revisited).
1680        let rec = fx
1681            .store
1682            .find_by_type("meeting")
1683            .unwrap()
1684            .into_iter()
1685            .find(|r| r.path == fx.p("records/meetings/standup.md"))
1686            .expect("meeting is catalogued in its sidecar");
1687        assert!(
1688            rec.links.is_empty(),
1689            "premise: the body link is NOT projected into the sidecar `links` field; got {:?}",
1690            rec.links
1691        );
1692
1693        // Yet backlinks still finds it — because it confirms via the file parse,
1694        // not via the sidecar `links` field.
1695        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1696        assert_eq!(
1697            paths(&got),
1698            vec!["records/meetings/standup"],
1699            "a body-only wiki-link must register as a backlink"
1700        );
1701    }
1702
1703    #[test]
1704    fn backlinks_finds_edge_in_typed_frontmatter_field() {
1705        // A wiki-link inside a *typed* frontmatter field (`company:`) is a real
1706        // edge forwardlinks follows, so backlinks must find it too — even though
1707        // the sidecar's `links` field (the `links:` key only) does not list it.
1708        let fx = Fixture::new();
1709        fx.write("records/companies/acme.md", "company", "Acme", "");
1710        fx.write_raw(
1711            "records/contacts/sarah.md",
1712            "---\ntype: contact\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Sarah\ncompany: [[records/companies/acme]]\n---\nBody with no links.\n",
1713        );
1714        fx.reindex();
1715        let got = backlinks(&fx.store, &fx.p("records/companies/acme.md")).unwrap();
1716        assert_eq!(
1717            paths(&got),
1718            vec!["records/contacts/sarah"],
1719            "a wiki-link in a typed frontmatter field is an incoming edge"
1720        );
1721    }
1722
1723    #[test]
1724    fn backlinks_unscoped_scans_the_tree_not_only_the_sidecar() {
1725        // REGRESSION (loop budget): an UNSCOPED `backlinks` must resolve incoming
1726        // edges with a SINGLE embedded-ripgrep pass over the tree
1727        // (`Store::find_links_to`), NOT by reading the sidecar candidate set and
1728        // then `read_to_string`-confirming each candidate (which re-opens every
1729        // content file → O(store); the documented >3x budget miss). A ripgrep
1730        // pass is the same scan engine `validate`/`rename`/`dbmd graph backlinks` ride, and
1731        // the tree — not the sidecar — is its ground truth: a linker that is on
1732        // disk but absent from every sidecar (stale / never-built index) is still
1733        // found. We assert that behaviorally, which fails loudly if the unscoped
1734        // path ever reverts to the sidecar-bounded per-candidate confirm loop
1735        // (that loop would NOT find the unindexed linker).
1736        let fx = Fixture::new();
1737        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1738        fx.write(
1739            "records/profiles/indexed.md",
1740            "profile",
1741            "Indexed",
1742            "[[records/contacts/sarah]]",
1743        );
1744        fx.reindex(); // builds sidecars for sarah + the indexed linker
1745
1746        // Now drop a NEW linker on disk WITHOUT reindexing — it is on disk but in
1747        // no sidecar.
1748        fx.write(
1749            "records/profiles/unindexed.md",
1750            "profile",
1751            "Unindexed",
1752            "[[records/contacts/sarah]]",
1753        );
1754
1755        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1756        assert_eq!(
1757            paths(&got),
1758            vec!["records/profiles/indexed", "records/profiles/unindexed"],
1759            "unscoped backlinks ripgrep-scans the tree, so the on-disk-but-unindexed \
1760             linker is found too — not only the sidecar-catalogued one"
1761        );
1762    }
1763
1764    #[test]
1765    fn backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk() {
1766        // REGRESSION (scale contract): the SCOPED form (`--type` / `--in`) is the
1767        // I/O-scoped path — it enumerates candidates from the relevant type-folder
1768        // `index.jsonl` sidecars and parses only those, NOT a whole-tree walk.
1769        // That is what makes the scope an I/O scope, not just a result filter:
1770        // a linker that is on disk but ABSENT from the sidecar (stale / never-built
1771        // index) is NOT discovered by the scoped call (the sidecar bounds which
1772        // files are candidates). This is the loop-vs-walk distinction the SPEC
1773        // draws, and it is exactly the inverse of the unscoped tree scan above.
1774        let fx = Fixture::new();
1775        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1776        fx.write(
1777            "records/profiles/indexed.md",
1778            "profile",
1779            "Indexed",
1780            "[[records/contacts/sarah]]",
1781        );
1782        fx.reindex(); // builds sidecars for sarah + the indexed linker
1783
1784        // Drop a NEW profile linker on disk WITHOUT reindexing — on disk, in no
1785        // sidecar.
1786        fx.write(
1787            "records/profiles/unindexed.md",
1788            "profile",
1789            "Unindexed",
1790            "[[records/contacts/sarah]]",
1791        );
1792
1793        // Scoped to the `profile` type: the candidate set is the sidecar's, so
1794        // only the catalogued linker is found — the unindexed one is invisible.
1795        let only_profiles = vec!["profile".to_string()];
1796        let got = backlinks_filtered(
1797            &fx.store,
1798            &fx.p("records/contacts/sarah.md"),
1799            &only_profiles,
1800            None,
1801        )
1802        .unwrap();
1803        assert_eq!(
1804            paths(&got),
1805            vec!["records/profiles/indexed"],
1806            "scoped backlinks reads the sidecar candidate set; the on-disk-but-unindexed \
1807             linker is not tree-walked"
1808        );
1809    }
1810
1811    #[test]
1812    fn backlinks_filtered_type_scopes_the_candidate_set() {
1813        // `--type` narrows backlinks to linkers of that type. Two files link to
1814        // the target — one `meeting`, one `profile`; filtering to `meeting`
1815        // returns only the meeting.
1816        let fx = Fixture::new();
1817        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1818        fx.write(
1819            "records/meetings/m1.md",
1820            "meeting",
1821            "Call",
1822            "[[records/contacts/sarah]]",
1823        );
1824        fx.write(
1825            "records/profiles/bio.md",
1826            "profile",
1827            "Bio",
1828            "[[records/contacts/sarah]]",
1829        );
1830        fx.reindex();
1831
1832        let only_meetings = vec!["meeting".to_string()];
1833        let got = backlinks_filtered(
1834            &fx.store,
1835            &fx.p("records/contacts/sarah.md"),
1836            &only_meetings,
1837            None,
1838        )
1839        .unwrap();
1840        assert_eq!(
1841            paths(&got),
1842            vec!["records/meetings/m1"],
1843            "--type meeting must exclude the profile linker"
1844        );
1845
1846        // Unfiltered, both come back — proving the filter (not the data) dropped one.
1847        let all = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1848        assert_eq!(
1849            paths(&all),
1850            vec!["records/meetings/m1", "records/profiles/bio"]
1851        );
1852    }
1853
1854    #[test]
1855    fn backlinks_filtered_layer_scopes_the_candidate_set() {
1856        // `--in <layer>` narrows backlinks to linkers under that layer. The two
1857        // linkers live in different layers (a sources email and a records
1858        // meeting) so the scope genuinely separates them.
1859        let fx = Fixture::new();
1860        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1861        fx.write(
1862            "records/meetings/m1.md",
1863            "meeting",
1864            "Call",
1865            "[[records/contacts/sarah]]",
1866        );
1867        fx.write(
1868            "sources/emails/intro.md",
1869            "email",
1870            "Intro",
1871            "[[records/contacts/sarah]]",
1872        );
1873        fx.reindex();
1874
1875        let got = backlinks_filtered(
1876            &fx.store,
1877            &fx.p("records/contacts/sarah.md"),
1878            &[],
1879            Some(Layer::Sources),
1880        )
1881        .unwrap();
1882        assert_eq!(
1883            paths(&got),
1884            vec!["sources/emails/intro"],
1885            "--in sources must keep only the sources-layer linker"
1886        );
1887
1888        let records_only = backlinks_filtered(
1889            &fx.store,
1890            &fx.p("records/contacts/sarah.md"),
1891            &[],
1892            Some(Layer::Records),
1893        )
1894        .unwrap();
1895        assert_eq!(paths(&records_only), vec!["records/meetings/m1"]);
1896    }
1897
1898    #[test]
1899    fn backlinks_scoped_type_spans_all_topic_folders_in_its_layer() {
1900        // REGRESSION (finding #12): a `type` can legitimately span several folders
1901        // within one layer — a `profile` is filed under its canonical
1902        // `records/profiles/` folder, but an agent may also file a profile under
1903        // another `records/<folder>/` (the type, not the folder, is authoritative).
1904        // The scoped candidate set must read the whole `records/` layer and filter
1905        // by type, NOT just the canonical-guess folder `records/profiles/`. Before
1906        // the fix, `find_by_type("profile")` read ONLY `records/profiles/index.jsonl`
1907        // whenever that sidecar existed, silently dropping every profile linker
1908        // filed under any other folder — so `backlinks --type profile` under-reported
1909        // dependents (a wrong blast-radius check) the moment a `records/profiles/`
1910        // page also existed.
1911        //
1912        // The trigger needs BOTH: a populated `records/profiles/` (so its canonical
1913        // sidecar exists) AND a profile elsewhere in the layer that links the
1914        // target. The earlier
1915        // `backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk` test
1916        // masks this bug precisely because its fixture has no `records/profiles/`.
1917        let fx = Fixture::new();
1918        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1919        // A profile in the CANONICAL type folder, NOT linking the target — its
1920        // only purpose is to make `records/profiles/index.jsonl` exist on disk.
1921        fx.write(
1922            "records/profiles/glossary.md",
1923            "profile",
1924            "Glossary",
1925            "No link to sarah here.",
1926        );
1927        // A profile in a NON-canonical folder that DOES link the target.
1928        fx.write(
1929            "records/people/sarah.md",
1930            "profile",
1931            "Sarah bio",
1932            "Profile of [[records/contacts/sarah]].",
1933        );
1934        fx.reindex(); // builds records/profiles/index.jsonl AND records/people/index.jsonl
1935
1936        // Scoped to `profile`: the off-canonical linker MUST be found. Pre-fix,
1937        // the candidate set was only `records/profiles/`'s sidecar, so this was empty.
1938        let scoped = backlinks_filtered(
1939            &fx.store,
1940            &fx.p("records/contacts/sarah.md"),
1941            &["profile".to_string()],
1942            None,
1943        )
1944        .unwrap();
1945        assert_eq!(
1946            paths(&scoped),
1947            vec!["records/people/sarah"],
1948            "a profile filed outside records/profiles/ must still be a scoped backlink"
1949        );
1950
1951        // Cross-check: the unscoped path (ripgrep tree scan) finds the same single
1952        // linker, proving the scoped result is now complete — not over- or
1953        // under-counting — and that the data was real all along.
1954        let unscoped = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1955        assert_eq!(
1956            paths(&unscoped),
1957            vec!["records/people/sarah"],
1958            "scoped and unscoped backlinks must agree on the edge set"
1959        );
1960    }
1961
1962    #[test]
1963    fn backlinks_scoped_type_finds_loose_file_at_non_canonical_layer() {
1964        // REGRESSION (spec-conformance, SPEC § Loose files): a loose file (content
1965        // directly at a layer root, no type-folder) may be filed at a layer that is
1966        // NOT the type's canonical layer — e.g. a `note` (canonical layer
1967        // `sources/`) filed as `records/loose-note.md` and catalogued in
1968        // `records/index.jsonl`. A scoped `backlinks --type note` must still find
1969        // it, matching the unscoped scan and `dbmd query --type note`.
1970        //
1971        // Pre-fix, `candidate_records(--type note)` read only `layer_for_type(note)`
1972        // = Sources, so the records-loose note was invisible (`--type note` empty),
1973        // and `--type note --in records` hit the early `continue` (records ≠ the
1974        // note's canonical Sources layer) → also empty. Both diverged from the
1975        // store-wide unscoped scan. The fix reads store-wide (or the named layer)
1976        // sidecars and filters by `type`, never short-circuiting on the canonical
1977        // layer.
1978        let fx = Fixture::new();
1979        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1980        // A loose `note` directly at the records/ layer root (no type-folder),
1981        // linking the target. Its canonical layer is sources/, so this exercises
1982        // exactly the off-canonical-layer loose-file path.
1983        fx.write_raw(
1984            "records/loose-note.md",
1985            "---\ntype: note\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Loose\n---\nMentions [[records/contacts/sarah]].\n",
1986        );
1987        fx.reindex(); // catalogs the loose note in records/index.jsonl
1988
1989        let target = fx.p("records/contacts/sarah.md");
1990        let note_type = vec!["note".to_string()];
1991
1992        // Unscoped: the loose note is a backlink (ground truth).
1993        let unscoped = backlinks(&fx.store, &target).unwrap();
1994        assert_eq!(
1995            paths(&unscoped),
1996            vec!["records/loose-note"],
1997            "unscoped backlinks finds the records-loose note"
1998        );
1999
2000        // `--type note` (no layer): must agree with unscoped, NOT empty.
2001        let by_type = backlinks_filtered(&fx.store, &target, &note_type, None).unwrap();
2002        assert_eq!(
2003            paths(&by_type),
2004            vec!["records/loose-note"],
2005            "`--type note` must find the loose note filed at the non-canonical (records) layer"
2006        );
2007
2008        // `--type note --in records`: the note lives in records/, so this must
2009        // find it too — the early `continue` on canonical-layer mismatch is gone.
2010        let by_type_in_records =
2011            backlinks_filtered(&fx.store, &target, &note_type, Some(Layer::Records)).unwrap();
2012        assert_eq!(
2013            paths(&by_type_in_records),
2014            vec!["records/loose-note"],
2015            "`--type note --in records` must find the records-loose note"
2016        );
2017
2018        // Cross-check the same completeness via the structured query path the SPEC
2019        // ties graph reads to: `query --type note` (store-wide) sees the loose note,
2020        // proving the data was real and the scoped graph result now agrees with it.
2021        let q_records: Vec<String> = paths(
2022            &crate::query::Query::new()
2023                .with_type("note")
2024                .execute(&fx.store)
2025                .unwrap()
2026                .into_iter()
2027                .map(|r| r.path)
2028                .collect::<Vec<_>>(),
2029        );
2030        assert_eq!(
2031            q_records,
2032            vec!["records/loose-note.md"],
2033            "query --type note sees the loose note store-wide; scoped backlinks must agree"
2034        );
2035    }
2036
2037    // ── neighborhood ─────────────────────────────────────────────────────────
2038
2039    #[test]
2040    fn neighborhood_hops_zero_is_empty() {
2041        let fx = Fixture::new();
2042        fx.write(
2043            "records/profiles/a.md",
2044            "profile",
2045            "A",
2046            "[[records/profiles/b]]",
2047        );
2048        fx.write("records/profiles/b.md", "profile", "B", "");
2049        let slice = neighborhood(
2050            &fx.store,
2051            &fx.p("records/profiles/a.md"),
2052            0,
2053            &[],
2054            Direction::Both,
2055        )
2056        .unwrap();
2057        assert_eq!(slice.seed, fx.p("records/profiles/a"));
2058        assert!(slice.nodes.is_empty());
2059    }
2060
2061    #[test]
2062    fn neighborhood_outgoing_one_hop_reads_summary_and_type() {
2063        let fx = Fixture::new();
2064        fx.write(
2065            "records/profiles/a.md",
2066            "profile",
2067            "Person A",
2068            "Knows [[records/contacts/b]].",
2069        );
2070        fx.write("records/contacts/b.md", "contact", "Contact B summary", "");
2071        let slice = neighborhood(
2072            &fx.store,
2073            &fx.p("records/profiles/a.md"),
2074            1,
2075            &[],
2076            Direction::Outgoing,
2077        )
2078        .unwrap();
2079        assert_eq!(slice.nodes.len(), 1);
2080        let n = &slice.nodes[0];
2081        assert_eq!(n.path, fx.p("records/contacts/b"));
2082        assert_eq!(n.summary, "Contact B summary");
2083        assert_eq!(n.type_.as_deref(), Some("contact"));
2084        assert_eq!(n.hops, 1);
2085        assert_eq!(
2086            n.via,
2087            Some((fx.p("records/profiles/a"), Direction::Outgoing))
2088        );
2089    }
2090
2091    #[test]
2092    fn neighborhood_incoming_only_walks_backlinks() {
2093        let fx = Fixture::new();
2094        // a -> seed (incoming to seed). seed -> c (outgoing from seed).
2095        fx.write(
2096            "records/profiles/seed.md",
2097            "profile",
2098            "Seed",
2099            "Out to [[records/profiles/c]].",
2100        );
2101        fx.write(
2102            "records/profiles/a.md",
2103            "profile",
2104            "A",
2105            "In to [[records/profiles/seed]].",
2106        );
2107        fx.write("records/profiles/c.md", "profile", "C", "");
2108        fx.reindex();
2109        let slice = neighborhood(
2110            &fx.store,
2111            &fx.p("records/profiles/seed.md"),
2112            1,
2113            &[],
2114            Direction::Incoming,
2115        )
2116        .unwrap();
2117        // Incoming direction: only `a` (which links TO seed), not `c`.
2118        assert_eq!(
2119            paths(
2120                &slice
2121                    .nodes
2122                    .iter()
2123                    .map(|n| n.path.clone())
2124                    .collect::<Vec<_>>()
2125            ),
2126            vec!["records/profiles/a"]
2127        );
2128        assert_eq!(
2129            slice.nodes[0].via,
2130            Some((fx.p("records/profiles/seed"), Direction::Incoming))
2131        );
2132    }
2133
2134    #[test]
2135    fn neighborhood_bounded_bfs_respects_hop_limit_and_min_distance() {
2136        let fx = Fixture::new();
2137        // Chain a -> b -> c -> d, all outgoing.
2138        fx.write("records/c/a.md", "concept", "A", "[[records/c/b]]");
2139        fx.write("records/c/b.md", "concept", "B", "[[records/c/c]]");
2140        fx.write("records/c/c.md", "concept", "C", "[[records/c/d]]");
2141        fx.write("records/c/d.md", "concept", "D", "");
2142        let slice = neighborhood(
2143            &fx.store,
2144            &fx.p("records/c/a.md"),
2145            2,
2146            &[],
2147            Direction::Outgoing,
2148        )
2149        .unwrap();
2150        // 2 hops reaches b (1) and c (2), not d (3).
2151        let by_path: HashMap<String, u32> = slice
2152            .nodes
2153            .iter()
2154            .map(|n| (n.path.to_string_lossy().to_string(), n.hops))
2155            .collect();
2156        assert_eq!(by_path.get("records/c/b").copied(), Some(1));
2157        assert_eq!(by_path.get("records/c/c").copied(), Some(2));
2158        assert_eq!(by_path.get("records/c/d"), None);
2159        assert_eq!(slice.nodes.len(), 2);
2160    }
2161
2162    #[test]
2163    fn neighborhood_records_min_hops_on_diamond() {
2164        let fx = Fixture::new();
2165        // Diamond: a -> b, a -> c, b -> d, c -> d. d is reachable at hop 2 from
2166        // either branch; it must be recorded once, at hop 2.
2167        fx.write(
2168            "records/d/a.md",
2169            "concept",
2170            "A",
2171            "[[records/d/b]] [[records/d/c]]",
2172        );
2173        fx.write("records/d/b.md", "concept", "B", "[[records/d/d]]");
2174        fx.write("records/d/c.md", "concept", "C", "[[records/d/d]]");
2175        fx.write("records/d/d.md", "concept", "D", "");
2176        let slice = neighborhood(
2177            &fx.store,
2178            &fx.p("records/d/a.md"),
2179            3,
2180            &[],
2181            Direction::Outgoing,
2182        )
2183        .unwrap();
2184        let d_nodes: Vec<&ContextNode> = slice
2185            .nodes
2186            .iter()
2187            .filter(|n| n.path == fx.p("records/d/d"))
2188            .collect();
2189        assert_eq!(d_nodes.len(), 1, "d must appear exactly once");
2190        assert_eq!(d_nodes[0].hops, 2, "d's min distance from a is 2");
2191        // b and c at hop 1, d at hop 2 => 3 nodes total, no cycle blowup.
2192        assert_eq!(slice.nodes.len(), 3);
2193    }
2194
2195    #[test]
2196    fn neighborhood_type_filter_narrows_results_but_not_traversal() {
2197        let fx = Fixture::new();
2198        // seed -> contact -> meeting. Filtering to `meeting` must still reach
2199        // the meeting THROUGH the (excluded) contact at hop 2.
2200        fx.write(
2201            "records/profiles/seed.md",
2202            "profile",
2203            "Seed",
2204            "[[records/contacts/sarah]]",
2205        );
2206        fx.write(
2207            "records/contacts/sarah.md",
2208            "contact",
2209            "Sarah",
2210            "[[records/meetings/m1]]",
2211        );
2212        fx.write("records/meetings/m1.md", "meeting", "Renewal call", "");
2213        let only_meetings = vec!["meeting".to_string()];
2214        let slice = neighborhood(
2215            &fx.store,
2216            &fx.p("records/profiles/seed.md"),
2217            2,
2218            &only_meetings,
2219            Direction::Outgoing,
2220        )
2221        .unwrap();
2222        // Only the meeting is returned; the contact is traversed but filtered out.
2223        assert_eq!(slice.nodes.len(), 1);
2224        assert_eq!(slice.nodes[0].path, fx.p("records/meetings/m1"));
2225        assert_eq!(slice.nodes[0].type_.as_deref(), Some("meeting"));
2226        assert_eq!(slice.nodes[0].hops, 2);
2227    }
2228
2229    #[test]
2230    fn neighborhood_capped_bounds_traversal_not_just_output() {
2231        // REGRESSION (finding #16): `neighborhood` expands every reached node, and
2232        // each incoming-edge expansion is a full-store scan, so the per-node cost
2233        // is O(visited × store). The CLI's `--limit` was applied post-hoc as a
2234        // `.take(n)` on the RESULT, which caps printed nodes but NOT the traversal
2235        // — the scans still fire for every reachable node. `neighborhood_capped`
2236        // bounds the traversal itself: once `max_nodes` distinct nodes are
2237        // admitted, the BFS stops discovering (and therefore stops scanning).
2238        //
2239        // Structure proving traversal — not just output — is bounded:
2240        //   seed -> a, b, c   (hop 1, discovered in sorted order: a, b, c)
2241        //   a    -> deep      (hop 2, reachable ONLY by expanding `a`)
2242        // Cap at 2: admit `a` and `b`, stop before `c` and before any hop-2
2243        // expansion. `deep` is therefore unreachable. A post-hoc `.take(2)` would
2244        // have traversed the whole graph (reaching `deep`) and only then truncated
2245        // — so the absence of `deep` is observable proof the traversal stopped.
2246        let fx = Fixture::new();
2247        fx.write(
2248            "records/n/seed.md",
2249            "concept",
2250            "Seed",
2251            "[[records/n/a]] [[records/n/b]] [[records/n/c]]",
2252        );
2253        fx.write("records/n/a.md", "concept", "A", "[[records/n/deep]]");
2254        fx.write("records/n/b.md", "concept", "B", "");
2255        fx.write("records/n/c.md", "concept", "C", "");
2256        fx.write("records/n/deep.md", "concept", "Deep", "");
2257
2258        // Uncapped over 3 hops: all four reachable nodes appear (a, b, c at hop 1,
2259        // deep at hop 2) — the full set the cap is measured against.
2260        let full = neighborhood(
2261            &fx.store,
2262            &fx.p("records/n/seed.md"),
2263            3,
2264            &[],
2265            Direction::Outgoing,
2266        )
2267        .unwrap();
2268        assert_eq!(
2269            paths(
2270                &full
2271                    .nodes
2272                    .iter()
2273                    .map(|n| n.path.clone())
2274                    .collect::<Vec<_>>()
2275            ),
2276            vec![
2277                "records/n/a",
2278                "records/n/b",
2279                "records/n/c",
2280                "records/n/deep"
2281            ],
2282            "uncapped traversal reaches every node within the hop budget"
2283        );
2284
2285        // Capped at 2 over the SAME hop budget: exactly the first two hop-1 nodes,
2286        // and crucially NOT `deep` — the cap halted the BFS before any node was
2287        // expanded into hop 2, so the deep node was never traversed to.
2288        let capped = neighborhood_capped(
2289            &fx.store,
2290            &fx.p("records/n/seed.md"),
2291            3,
2292            &[],
2293            Direction::Outgoing,
2294            Some(2),
2295        )
2296        .unwrap();
2297        assert_eq!(
2298            paths(
2299                &capped
2300                    .nodes
2301                    .iter()
2302                    .map(|n| n.path.clone())
2303                    .collect::<Vec<_>>()
2304            ),
2305            vec!["records/n/a", "records/n/b"],
2306            "the cap bounds traversal: only the first 2 nodes are reached, and the \
2307             hop-2 `deep` node (reachable only by expanding a capped-out node) is \
2308             never traversed"
2309        );
2310
2311        // `max_nodes = None` is exactly the unbounded `neighborhood` behavior.
2312        let uncapped = neighborhood_capped(
2313            &fx.store,
2314            &fx.p("records/n/seed.md"),
2315            3,
2316            &[],
2317            Direction::Outgoing,
2318            None,
2319        )
2320        .unwrap();
2321        assert_eq!(
2322            uncapped.nodes.len(),
2323            full.nodes.len(),
2324            "None cap matches the unbounded neighborhood result"
2325        );
2326    }
2327
2328    #[test]
2329    fn neighborhood_capped_both_direction_caps_the_node_count() {
2330        // The CLI always passes `Direction::Both` (the per-node backlinks scan is
2331        // the expensive path the cap exists to bound). The cap gates discovery in
2332        // any direction, so a hub linked from many nodes is still bounded.
2333        let fx = Fixture::new();
2334        fx.write("records/profiles/hub.md", "profile", "Hub", "");
2335        for n in ["a", "b", "c", "d", "e"] {
2336            fx.write(
2337                &format!("records/profiles/{n}.md"),
2338                "profile",
2339                n,
2340                "[[records/profiles/hub]]",
2341            );
2342        }
2343        fx.reindex();
2344
2345        let capped = neighborhood_capped(
2346            &fx.store,
2347            &fx.p("records/profiles/hub.md"),
2348            1,
2349            &[],
2350            Direction::Both,
2351            Some(3),
2352        )
2353        .unwrap();
2354        assert_eq!(
2355            capped.nodes.len(),
2356            3,
2357            "Both-direction neighborhood is bounded to the node cap"
2358        );
2359
2360        // Without the cap the same call returns all five backlinking nodes,
2361        // proving the cap (not the data) limited the set.
2362        let uncapped = neighborhood(
2363            &fx.store,
2364            &fx.p("records/profiles/hub.md"),
2365            1,
2366            &[],
2367            Direction::Both,
2368        )
2369        .unwrap();
2370        assert_eq!(uncapped.nodes.len(), 5);
2371    }
2372
2373    #[test]
2374    fn neighborhood_cycle_terminates() {
2375        let fx = Fixture::new();
2376        // a <-> b cycle. Must not loop forever; each appears once.
2377        fx.write("records/g/a.md", "concept", "A", "[[records/g/b]]");
2378        fx.write("records/g/b.md", "concept", "B", "[[records/g/a]]");
2379        fx.reindex();
2380        let slice =
2381            neighborhood(&fx.store, &fx.p("records/g/a.md"), 10, &[], Direction::Both).unwrap();
2382        // From a: b is the only other node (a is the seed, excluded).
2383        assert_eq!(
2384            paths(
2385                &slice
2386                    .nodes
2387                    .iter()
2388                    .map(|n| n.path.clone())
2389                    .collect::<Vec<_>>()
2390            ),
2391            vec!["records/g/b"]
2392        );
2393    }
2394
2395    // ── orphans ──────────────────────────────────────────────────────────────
2396
2397    #[test]
2398    fn orphans_finds_files_with_no_edges_either_direction() {
2399        let fx = Fixture::new();
2400        // Wired pair: a links to b (a has outgoing, b has incoming).
2401        fx.write(
2402            "records/profiles/a.md",
2403            "profile",
2404            "A",
2405            "[[records/profiles/b]]",
2406        );
2407        fx.write("records/profiles/b.md", "profile", "B", "");
2408        // Orphan: no links in or out.
2409        fx.write(
2410            "sources/emails/lonely.md",
2411            "email",
2412            "Lonely email",
2413            "Just text, no links.",
2414        );
2415        let got = orphans(&fx.store, None).unwrap();
2416        assert_eq!(paths(&got), vec!["sources/emails/lonely.md"]);
2417    }
2418
2419    #[test]
2420    fn orphans_file_with_only_broken_outgoing_link_is_orphan() {
2421        let fx = Fixture::new();
2422        // Broken targets are validation issues, not graph edges to another
2423        // store file. A file whose only link points nowhere is still an orphan.
2424        fx.write(
2425            "records/profiles/a.md",
2426            "profile",
2427            "A",
2428            "[[records/contacts/ghost]]",
2429        );
2430        let got = orphans(&fx.store, None).unwrap();
2431        assert!(
2432            paths(&got).contains(&"records/profiles/a.md".to_string()),
2433            "broken outgoing links must not wire the graph: {got:?}"
2434        );
2435    }
2436
2437    #[test]
2438    fn orphans_file_with_only_incoming_is_not_orphan() {
2439        let fx = Fixture::new();
2440        // `target` has no outgoing links but IS linked to by `linker` — not an orphan.
2441        fx.write("records/contacts/target.md", "contact", "Target", "");
2442        fx.write(
2443            "records/profiles/linker.md",
2444            "profile",
2445            "Linker",
2446            "[[records/contacts/target]]",
2447        );
2448        let got = orphans(&fx.store, None).unwrap();
2449        assert!(
2450            !paths(&got).contains(&"records/contacts/target.md".to_string()),
2451            "incoming-only is not an orphan: {got:?}"
2452        );
2453        // `linker` has outgoing, so also not an orphan.
2454        assert!(!paths(&got).contains(&"records/profiles/linker.md".to_string()));
2455    }
2456
2457    #[test]
2458    fn orphans_incoming_link_from_other_layer_unorphans() {
2459        let fx = Fixture::new();
2460        // Candidate in records/, only incoming edge comes from sources/ — a
2461        // cross-layer link must still un-orphan it even when scoped to records.
2462        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
2463        fx.write(
2464            "sources/emails/sarah.md",
2465            "email",
2466            "bio",
2467            "[[records/contacts/sarah]]",
2468        );
2469        // A genuine orphan in records/ to prove the scope still returns something.
2470        fx.write("records/contacts/nemo.md", "contact", "Nemo", "");
2471        let got = orphans(&fx.store, Some(Layer::Records)).unwrap();
2472        assert_eq!(paths(&got), vec!["records/contacts/nemo.md"]);
2473    }
2474
2475    #[test]
2476    fn orphans_layer_scope_filters_candidates() {
2477        let fx = Fixture::new();
2478        // Orphans across both layers: one source, and two records (an atomic
2479        // contact + a conclusion `profile`, the former wiki-page).
2480        fx.write("sources/emails/s.md", "email", "S", "no links");
2481        fx.write("records/contacts/r.md", "contact", "R", "");
2482        fx.write("records/profiles/w.md", "profile", "W", "");
2483        // The records scope keeps only the two records-layer orphans.
2484        let only_records = orphans(&fx.store, Some(Layer::Records)).unwrap();
2485        assert_eq!(
2486            paths(&only_records),
2487            vec!["records/contacts/r.md", "records/profiles/w.md"]
2488        );
2489        let only_sources = orphans(&fx.store, Some(Layer::Sources)).unwrap();
2490        assert_eq!(paths(&only_sources), vec!["sources/emails/s.md"]);
2491        // No scope: all three, sorted (records, records, sources).
2492        let all = orphans(&fx.store, None).unwrap();
2493        assert_eq!(
2494            paths(&all),
2495            vec![
2496                "records/contacts/r.md",
2497                "records/profiles/w.md",
2498                "sources/emails/s.md",
2499            ]
2500        );
2501    }
2502
2503    #[test]
2504    fn orphans_self_link_does_not_count_as_an_edge() {
2505        let fx = Fixture::new();
2506        // A page that only links to itself has no real edges => still an orphan.
2507        fx.write(
2508            "records/synthesis/solo.md",
2509            "synthesis",
2510            "Solo",
2511            "I reference [[records/synthesis/solo]] only.",
2512        );
2513        let got = orphans(&fx.store, None).unwrap();
2514        assert_eq!(paths(&got), vec!["records/synthesis/solo.md"]);
2515    }
2516
2517    #[test]
2518    fn orphans_excludes_index_and_db_files() {
2519        let fx = Fixture::new();
2520        // A lone index.md / DB.md must never be reported as an orphan content file.
2521        fx.write_raw(
2522            "records/index.md",
2523            "---\ntype: index\nscope: layer\nfolder: records\n---\n# records\n",
2524        );
2525        fx.write(
2526            "records/profiles/real-orphan.md",
2527            "profile",
2528            "Real",
2529            "no links",
2530        );
2531        let got = orphans(&fx.store, None).unwrap();
2532        assert_eq!(paths(&got), vec!["records/profiles/real-orphan.md"]);
2533    }
2534
2535    // ── frontmatter_block helper ─────────────────────────────────────────────
2536
2537    #[test]
2538    fn frontmatter_block_extracts_between_fences() {
2539        let text = "---\ntype: contact\nsummary: hi\n---\nbody here\n";
2540        assert_eq!(
2541            frontmatter_block(text),
2542            Some("type: contact\nsummary: hi\n")
2543        );
2544    }
2545
2546    #[test]
2547    fn frontmatter_block_none_without_leading_fence() {
2548        let text = "no frontmatter here\n";
2549        assert_eq!(frontmatter_block(text), None);
2550    }
2551
2552    #[test]
2553    fn frontmatter_block_tolerates_leading_bom() {
2554        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
2555        // fence must not hide the frontmatter from the graph layer — otherwise a
2556        // BOM-prefixed file the catalog indexes contributes no backlinks/edges.
2557        // Pre-fix the `---\n` strip failed on the BOM and returned None.
2558        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody here\n";
2559        assert_eq!(
2560            frontmatter_block(text),
2561            Some("type: contact\nsummary: hi\n"),
2562            "a leading BOM must not hide frontmatter from the graph layer"
2563        );
2564    }
2565
2566    // ── shared edge notion: whitespace / fence / case / containment ──────────
2567
2568    /// Padded `[[ x ]]` must be a forward edge AND (after reindex) a backward
2569    /// edge — the two views agreeing on the same edge in a clean store.
2570    #[test]
2571    fn padded_link_is_both_a_forward_and_backward_edge() {
2572        let fx = Fixture::new();
2573        fx.write(
2574            "records/contacts/sarah.md",
2575            "contact",
2576            "Sarah",
2577            "the contact",
2578        );
2579        fx.write(
2580            "records/profiles/a.md",
2581            "profile",
2582            "A",
2583            "See [[ records/contacts/sarah ]] today.",
2584        );
2585        fx.reindex();
2586
2587        assert_eq!(
2588            paths(&forwardlinks(&fx.store, Path::new("records/profiles/a.md")).unwrap()),
2589            vec!["records/contacts/sarah"],
2590            "padded link is a forward edge"
2591        );
2592        assert_eq!(
2593            paths(&backlinks(&fx.store, Path::new("records/contacts/sarah.md")).unwrap()),
2594            vec!["records/profiles/a"],
2595            "padded link is the SAME backward edge (forward and backward agree)"
2596        );
2597    }
2598
2599    /// A `[[...]]` only inside a fenced code block is a documentation example,
2600    /// not an edge: no forward edge, no backward edge, and the source page is an
2601    /// orphan (no real links). Matches validate's fence-aware extractor.
2602    #[test]
2603    fn fenced_link_is_not_an_edge_and_page_is_orphan() {
2604        let fx = Fixture::new();
2605        fx.write(
2606            "records/contacts/sarah.md",
2607            "contact",
2608            "Sarah",
2609            "the contact",
2610        );
2611        fx.write(
2612            "records/synthesis/howto.md",
2613            "synthesis",
2614            "Howto",
2615            "```markdown\n[[records/contacts/sarah]] is how you link.\n```",
2616        );
2617        fx.reindex();
2618
2619        assert!(
2620            forwardlinks(&fx.store, Path::new("records/synthesis/howto.md"))
2621                .unwrap()
2622                .is_empty(),
2623            "a fenced example is not a forward edge"
2624        );
2625        assert!(
2626            backlinks(&fx.store, Path::new("records/contacts/sarah.md"))
2627                .unwrap()
2628                .is_empty(),
2629            "a fenced example is not a backward edge"
2630        );
2631        let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2632        assert!(
2633            orphan_set.contains(&"records/synthesis/howto.md".to_string()),
2634            "a page whose only link is fenced has no real edges => orphan: {orphan_set:?}"
2635        );
2636    }
2637
2638    /// `rename` must NOT rewrite a `[[...]]` inside a fenced code block (it is
2639    /// verbatim documentation, not an edge), while still rewriting a real link.
2640    #[test]
2641    fn rewrite_links_to_leaves_fenced_examples_untouched() {
2642        let input = "\
2643Real [[records/contacts/sarah]] link.
2644
2645```markdown
2646Example: [[records/contacts/sarah]] inside a fence.
2647```
2648
2649Trailing [[records/contacts/sarah]].
2650";
2651        let got = rewrite_links_to(
2652            input,
2653            Path::new("records/contacts/sarah"),
2654            Path::new("records/contacts/sarah-chen"),
2655        );
2656        // The two non-fenced links retarget; the fenced one is verbatim.
2657        assert!(
2658            got.contains("Real [[records/contacts/sarah-chen]] link."),
2659            "real link before the fence must retarget"
2660        );
2661        assert!(
2662            got.contains("Trailing [[records/contacts/sarah-chen]]."),
2663            "real link after the fence must retarget"
2664        );
2665        assert!(
2666            got.contains("Example: [[records/contacts/sarah]] inside a fence."),
2667            "fenced example must stay verbatim, got:\n{got}"
2668        );
2669    }
2670
2671    /// `rewrite_links_to` matches a padded link and preserves the display.
2672    #[test]
2673    fn rewrite_links_to_matches_padded_link() {
2674        let got = rewrite_links_to(
2675            "See [[ records/contacts/sarah |Sarah]] today.",
2676            Path::new("records/contacts/sarah"),
2677            Path::new("records/contacts/sarah-chen"),
2678        );
2679        assert_eq!(got, "See [[records/contacts/sarah-chen|Sarah]] today.");
2680    }
2681
2682    /// On a case-insensitive filesystem a case-variant link is the same edge:
2683    /// backlinks finds it, orphans does NOT falsely orphan the target, and
2684    /// rename rewrites it. On a case-sensitive FS the link is genuinely a
2685    /// different target, so the test is skipped.
2686    #[cfg(unix)]
2687    #[test]
2688    fn case_variant_link_is_one_edge_on_case_insensitive_fs() {
2689        // Probe the filesystem the same way the production code does
2690        // (`link_edge_key` is imported at module scope).
2691        if link_edge_key("A") != link_edge_key("a") {
2692            // case-sensitive filesystem: the case-variant link is a different
2693            // target, so this scenario doesn't apply.
2694            return;
2695        }
2696        let fx = Fixture::new();
2697        fx.write(
2698            "records/contacts/sarah-chen.md",
2699            "contact",
2700            "Sarah",
2701            "the contact",
2702        );
2703        fx.write(
2704            "records/profiles/bio.md",
2705            "profile",
2706            "Bio",
2707            "See [[records/contacts/Sarah-Chen]].",
2708        );
2709        fx.reindex();
2710
2711        assert_eq!(
2712            paths(&backlinks(&fx.store, Path::new("records/contacts/sarah-chen.md")).unwrap()),
2713            vec!["records/profiles/bio"],
2714            "case-variant incoming link must be a backward edge"
2715        );
2716        let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2717        assert!(
2718            !orphan_set.contains(&"records/contacts/sarah-chen.md".to_string()),
2719            "a target with a live case-variant incoming link must NOT be orphaned: {orphan_set:?}"
2720        );
2721
2722        let rewritten = rewrite_links_to(
2723            "See [[records/contacts/Sarah-Chen]].",
2724            Path::new("records/contacts/sarah-chen"),
2725            Path::new("records/contacts/sarah"),
2726        );
2727        assert_eq!(
2728            rewritten, "See [[records/contacts/sarah]].",
2729            "rename must rewrite the case-variant link on a case-insensitive FS"
2730        );
2731    }
2732
2733    /// REGRESSION (Unicode encoding / silent graph break): a file whose name is
2734    /// written in one Unicode normalization form and an incoming link written in
2735    /// the OTHER form must be ONE edge — on macOS/APFS both name the same file
2736    /// (the FS folds NFC/NFD), so the string-keyed graph must agree. Before the
2737    /// fix, `link_edge_key` only case-folded (no NFC), so `backlinks` returned
2738    /// empty and `orphans` flagged the linked-to file as an orphan while
2739    /// `validate` saw the link as live. NFC-keying both sides unifies them.
2740    ///
2741    /// Runs on every platform: the file is written NFC and linked NFD (both
2742    /// representable in any filename), and `link_edge_key` normalizes
2743    /// unconditionally, so the assertion holds regardless of host FS folding.
2744    #[test]
2745    fn nfc_nfd_cross_normalization_link_is_one_edge() {
2746        let fx = Fixture::new();
2747        // File on disk: NFC `josé` (é = U+00E9).
2748        fx.write(
2749            "records/contacts/jos\u{00e9}.md",
2750            "contact",
2751            "Jose",
2752            "the contact",
2753        );
2754        // Incoming link: NFD `josé` (e + U+0301) — byte-different, same name.
2755        fx.write(
2756            "records/profiles/bio.md",
2757            "profile",
2758            "Bio",
2759            "Knows [[records/contacts/jose\u{0301}]].",
2760        );
2761        fx.reindex();
2762
2763        // backlinks: the NFD link must resolve to the NFC file.
2764        assert_eq!(
2765            paths(&backlinks(&fx.store, Path::new("records/contacts/jos\u{00e9}.md")).unwrap()),
2766            vec!["records/profiles/bio"],
2767            "an NFD incoming link must be a backward edge of the NFC-named file"
2768        );
2769
2770        // orphans: the linked-to file must NOT be flagged as an orphan.
2771        let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2772        assert!(
2773            !orphan_set.contains(&"records/contacts/jos\u{00e9}.md".to_string()),
2774            "a target with a live cross-normalization incoming link must NOT be orphaned: \
2775             {orphan_set:?}"
2776        );
2777
2778        // forwardlinks: the body link is a real forward edge. Its emitted target
2779        // is the canonical (normalization-PRESERVING) form — i.e. the NFD bytes
2780        // as written, NOT re-normalized to NFC — because `forwardlinks` output
2781        // feeds byte-faithful rewrites; only the comparison KEY is NFC-folded.
2782        let fwd = paths(&forwardlinks(&fx.store, &fx.p("records/profiles/bio.md")).unwrap());
2783        assert_eq!(
2784            fwd,
2785            vec!["records/contacts/jose\u{0301}"],
2786            "forwardlinks must emit the body link's canonical (NFD-preserving) target"
2787        );
2788    }
2789
2790    /// A `[[../outside/x]]` escaping wiki-link is never a forward edge, and a
2791    /// `neighborhood` from the escaping page never reads or traverses through the
2792    /// external file — closing the disclosure vector.
2793    #[cfg(unix)]
2794    #[test]
2795    fn escaping_link_is_not_an_edge_and_neighborhood_does_not_escape() {
2796        let fx = Fixture::new();
2797        // An external file OUTSIDE the store root, with its own in-store link.
2798        let outside_dir = fx.store.root.parent().unwrap().join("outside");
2799        fs::create_dir_all(&outside_dir).unwrap();
2800        fs::write(
2801            outside_dir.join("secret.md"),
2802            "---\ntype: note\nsummary: TOPSECRET\n---\nLinks [[records/contacts/sarah]].\n",
2803        )
2804        .unwrap();
2805        fx.write(
2806            "records/contacts/sarah.md",
2807            "contact",
2808            "Sarah",
2809            "the contact",
2810        );
2811        fx.write(
2812            "records/concepts/traversal.md",
2813            "concept",
2814            "Traversal",
2815            "See [[../outside/secret]].",
2816        );
2817        fx.reindex();
2818
2819        // The escaping target is not a forward edge.
2820        assert!(
2821            forwardlinks(&fx.store, Path::new("records/concepts/traversal.md"))
2822                .unwrap()
2823                .is_empty(),
2824            "an escaping `[[../outside/secret]]` must not be a forward edge"
2825        );
2826
2827        // Neighborhood from the escaping page reaches nothing through the
2828        // external file (the external file is never read/traversed).
2829        let slice = neighborhood(
2830            &fx.store,
2831            Path::new("records/concepts/traversal.md"),
2832            2,
2833            &[],
2834            Direction::Outgoing,
2835        )
2836        .unwrap();
2837        assert!(
2838            slice
2839                .nodes
2840                .iter()
2841                .all(|n| !n.path.to_string_lossy().contains("outside")),
2842            "neighborhood must not read/traverse the external file: {:?}",
2843            slice.nodes
2844        );
2845    }
2846
2847    /// REGRESSION (path-safety / info-disclosure): a wiki-link target whose path
2848    /// is made entirely of `Normal` components but routes through a **symlink**
2849    /// pointing outside the store must NOT leak the out-of-store file's
2850    /// `summary`/`type` into a `neighborhood` slice. Two shapes:
2851    ///   (a) a symlinked DIRECTORY component (`records/linkdir -> /external/dir`,
2852    ///       link `[[records/linkdir/secret]]`), and
2853    ///   (b) a directly-symlinked `.md` (`records/aliased.md -> /external/secret.md`,
2854    ///       link `[[records/aliased]]`).
2855    /// Both used to slip past the all-`Normal`-components fast path in
2856    /// `resolves_within_store` (which returned `true` without canonicalizing), so
2857    /// `store.root.join(rel)` followed the in-store symlink, `is_file()` succeeded,
2858    /// and the external file was read. The fix routes every candidate through the
2859    /// symlink-resolving `ensure_path_within_store`, so these resolve to NO
2860    /// out-of-store node — exactly like the `..` escape control above. A legitimate
2861    /// in-store link still resolves, proving the gate did not over-block.
2862    #[cfg(unix)]
2863    #[test]
2864    fn symlinked_normal_component_does_not_disclose_out_of_store_file() {
2865        use std::os::unix::fs::symlink;
2866
2867        let fx = Fixture::new();
2868        // The secret lives OUTSIDE the store root, as a sibling of it.
2869        let outside_dir = fx.store.root.parent().unwrap().join("secret");
2870        fs::create_dir_all(&outside_dir).unwrap();
2871        fs::write(
2872            outside_dir.join("secret.md"),
2873            "---\ntype: contact\nsummary: TOP SECRET\n---\n# x\n",
2874        )
2875        .unwrap();
2876
2877        // A legitimate in-store target, to prove the gate does not over-block.
2878        fx.write("records/contacts/real.md", "contact", "Real Contact", "");
2879
2880        // (a) symlinked DIRECTORY component: records/linkdir -> <outside>/secret
2881        symlink(&outside_dir, fx.store.root.join("records/linkdir")).unwrap();
2882        fx.write(
2883            "records/contacts/seed.md",
2884            "contact",
2885            "Seed",
2886            "[[records/linkdir/secret]] and the in-store [[records/contacts/real]].",
2887        );
2888
2889        // (b) directly-symlinked .md: records/aliased.md -> <outside>/secret.md
2890        symlink(
2891            outside_dir.join("secret.md"),
2892            fx.store.root.join("records/aliased.md"),
2893        )
2894        .unwrap();
2895        fx.write(
2896            "records/contacts/seed2.md",
2897            "contact",
2898            "Seed2",
2899            "[[records/aliased]]",
2900        );
2901        fx.reindex();
2902
2903        // (a): the symlinked-dir target must NOT appear; the in-store link must.
2904        let slice = neighborhood(
2905            &fx.store,
2906            &fx.p("records/contacts/seed.md"),
2907            1,
2908            &[],
2909            Direction::Outgoing,
2910        )
2911        .unwrap();
2912        assert!(
2913            !slice.nodes.iter().any(|n| n.summary == "TOP SECRET"),
2914            "a symlinked-dir component must not disclose the out-of-store summary: {:?}",
2915            slice.nodes
2916        );
2917        assert!(
2918            !slice
2919                .nodes
2920                .iter()
2921                .any(|n| n.path.to_string_lossy().contains("linkdir")),
2922            "the symlinked-out-of-store target must not be a node: {:?}",
2923            slice.nodes
2924        );
2925        assert!(
2926            slice
2927                .nodes
2928                .iter()
2929                .any(|n| n.path == fx.p("records/contacts/real")),
2930            "the legitimate in-store link must still resolve (gate did not over-block): {:?}",
2931            slice.nodes
2932        );
2933
2934        // (b): the directly-symlinked .md target must NOT disclose anything.
2935        let slice2 = neighborhood(
2936            &fx.store,
2937            &fx.p("records/contacts/seed2.md"),
2938            1,
2939            &[],
2940            Direction::Outgoing,
2941        )
2942        .unwrap();
2943        assert!(
2944            slice2.nodes.is_empty(),
2945            "a directly-symlinked .md pointing outside the store must yield no node: {:?}",
2946            slice2.nodes
2947        );
2948    }
2949
2950    #[test]
2951    fn regression_non_utf8_linker_edges_survive_scoped_backlinks_and_orphans() {
2952        // Adversarial review #10: a content file with a stray non-UTF8 byte beside
2953        // a valid ASCII `[[...]]` line must still expose its edges. The unscoped
2954        // backlink scanner reads bytes lossily, but `forwardlinks`/`orphans` used
2955        // `read_to_string` and dropped EVERY edge on `InvalidData` — so scoped
2956        // backlinks under-reported vs unscoped, and `orphans` flagged BOTH
2957        // endpoints of a live edge.
2958        let fx = Fixture::new();
2959        fx.write("records/contacts/sarah.md", "contact", "Sarah", "# Sarah");
2960        // bio.md: valid UTF-8 frontmatter, but a BODY line with a 0xE9 byte
2961        // (Latin-1 'é', invalid as standalone UTF-8) beside the link to sarah.
2962        let mut bytes: Vec<u8> = Vec::new();
2963        bytes.extend_from_slice(
2964            b"---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Bio\n---\n",
2965        );
2966        bytes.extend_from_slice(b"See [[records/contacts/sarah]] caf");
2967        bytes.push(0xE9);
2968        bytes.extend_from_slice(b"\n");
2969        let bio_abs = fx.store.root.join("records/profiles/bio.md");
2970        fs::create_dir_all(bio_abs.parent().unwrap()).unwrap();
2971        fs::write(&bio_abs, &bytes).unwrap();
2972        fx.reindex();
2973
2974        let sarah = fx.p("records/contacts/sarah");
2975
2976        // forwardlinks reads the non-UTF8 file and still finds the edge.
2977        let fwd = paths(&forwardlinks(&fx.store, &fx.p("records/profiles/bio")).unwrap());
2978        assert!(
2979            fwd.iter().any(|p| p.contains("sarah")),
2980            "forwardlinks must extract the edge from a non-UTF8 file: {fwd:?}"
2981        );
2982
2983        // Scoped backlinks (rides `forwardlinks`) must AGREE with unscoped.
2984        let unscoped = paths(&backlinks(&fx.store, &sarah).unwrap());
2985        let scoped =
2986            paths(&backlinks_filtered(&fx.store, &sarah, &["profile".to_string()], None).unwrap());
2987        assert!(
2988            unscoped.iter().any(|p| p.contains("bio")),
2989            "unscoped backlinks must include bio: {unscoped:?}"
2990        );
2991        assert!(
2992            scoped.iter().any(|p| p.contains("bio")),
2993            "scoped backlinks must agree with unscoped on the non-UTF8 linker: {scoped:?}"
2994        );
2995
2996        // Neither endpoint of the live edge may be reported as an orphan.
2997        let orph = paths(&orphans(&fx.store, None).unwrap());
2998        assert!(
2999            !orph
3000                .iter()
3001                .any(|p| p.contains("bio") || p.contains("sarah")),
3002            "neither endpoint of a live edge may be an orphan: {orph:?}"
3003        );
3004    }
3005}