dbmd_core/graph.rs
1//! `graph` — the wiki-link **relationship layer**.
2//!
3//! Wiki-links are curated-relevance edges (the LLM wrote them), so the graph's
4//! job is to **assemble the relevant context around a seed**, not to be
5//! analyzed. **All ops are on-demand — there is no maintained graph** (a
6//! persistent graph is the roadmap engine).
7//!
8//! [`backlinks`] / [`forwardlinks`] are loop ops (O(changed), never O(store)).
9//! [`neighborhood`] is the high-value context-hydration op. [`orphans`] is a
10//! SWEEP curation worklist.
11//!
12//! Whole-graph analytics (connected components, cycle detection, shortest
13//! path, sinks/sources, DOT/JSON export) are deliberately **not** here — a
14//! human studying the graph opens the store in Obsidian; broken-link detection
15//! is [`crate::validate`]'s job (`WIKI_LINK_BROKEN`).
16//!
17//! ## Implementation note — two paths for the incoming-edge scan
18//!
19//! The scale contract (SPEC § Tooling, plan: *"the interactive loop is
20//! O(changed), never O(store)"*) is the load-bearing rule here. [`backlinks`]
21//! is a loop op, so it must **not** open and `read_to_string` every content file
22//! in the store on each call. It resolves incoming edges by one of two paths,
23//! chosen by whether the call is scoped:
24//!
25//! - **Unscoped** (`dbmd graph backlinks <x>`, no `--type`/`--in`): one
26//! embedded-ripgrep pass for the literal `[[<target>]]` over the tree, via
27//! [`Store::find_links_to`] (`grep` + `ignore`, early-exit per file) — the
28//! same scan engine [`crate::validate`]'s working-set incoming-linker step
29//! uses. A single store traversal with cheap presence-only matching, not N
30//! whole-file parses; that is what keeps the unscoped call inside the loop
31//! budget. [`backlinks`] then filters the raw hits to content files and emits
32//! canonical bare targets (its relationship view), where the lower-level
33//! [`Store::find_links_to`] returns every `.md` the text appears in.
34//! - **Scoped** (`--type` / `--in`): the candidate set is enumerated from the
35//! relevant layer's `index.jsonl` sidecars — the sidecars of the one layer the
36//! `--type` belongs to (via [`Store::sidecar_records`]), filtered to that type
37//! — and each candidate is confirmed by a single-file parse. That is what makes
38//! `--type` / `--in` an *I/O* scope, not just a result filter: a typed/layer-scoped
39//! `backlinks` reads only the relevant layer's sidecars (O(entities-in-layer))
40//! and parses only those files. A type's records can span several folders within
41//! its layer (a `profile` filed under any `records/<folder>/`, not only its
42//! canonical `records/profiles/`), so the read is layer-wide, not a single
43//! canonical folder — otherwise off-canonical-folder linkers would be silently
44//! dropped.
45//!
46//! **Why the scoped path confirms by parsing the candidate, not by trusting the
47//! sidecar's `links` field.** A sidecar record's `links` is the file's
48//! *frontmatter* `links:` list only — it does **not** capture wiki-links written
49//! in the body or inside other typed frontmatter fields (`company: [[…]]`,
50//! `attendees: [ … ]`, `derived_from: [ … ]`). [`forwardlinks`] extracts edges
51//! from the whole file, so to keep the two directions on the **same** edge set
52//! (an incoming edge to X is exactly: some file whose [`forwardlinks`] contains
53//! X) the incoming-edge confirmation re-parses each candidate file the same way.
54//! The sidecar bounds *which* files are candidates; the parse decides whether
55//! each truly links. The unscoped ripgrep path stays on that same edge set by
56//! matching the link text wherever it lives in the file (frontmatter or body).
57//! A node's `summary` / `type` likewise read frontmatter directly (the source of
58//! truth the sidecar is derived from; never stale).
59
60use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
61use std::path::{Path, PathBuf};
62
63use ignore::WalkBuilder;
64
65use crate::index::IndexRecord;
66use crate::store::{
67 canonical_link_target, ensure_path_within_store, extract_edge_targets, fence_closes,
68 fence_opens, link_edge_key, Layer, Store, StoreError,
69};
70
71/// Which edge directions a traversal follows.
72#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub enum Direction {
74 /// Incoming edges only (backlinks).
75 Incoming,
76 /// Outgoing edges only (forwardlinks).
77 Outgoing,
78 /// Both directions.
79 Both,
80}
81
82/// One node reached during a [`neighborhood`] hydration: the file, its
83/// `summary`, and how it connects back toward the seed.
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub struct ContextNode {
86 /// The store-relative path of the reached file.
87 pub path: PathBuf,
88 /// The file's `summary` (read from its sidecar entry / frontmatter).
89 pub summary: String,
90 /// The file's `type`, when known.
91 pub type_: Option<String>,
92 /// Hop distance from the seed (the seed itself is 0).
93 pub hops: u32,
94 /// The relationship edge that brought this node into the slice: the path it
95 /// links to/from one hop closer to the seed, and the direction.
96 pub via: Option<(PathBuf, Direction)>,
97}
98
99/// The readable working-set digest [`neighborhood`] returns: the seed plus the
100/// reached nodes with their summaries and connections. The relationship-axis
101/// "turn a seed into context" primitive.
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct ContextSlice {
104 /// The seed the slice was hydrated from.
105 pub seed: PathBuf,
106 /// The reached nodes (excluding the seed), in BFS order.
107 pub nodes: Vec<ContextNode>,
108}
109
110/// Incoming edges to `path`: files that wiki-link to it. The blast-radius /
111/// dependents primitive before an edit. Store-wide (every layer / every type);
112/// see [`backlinks_filtered`] for the `--type` / `--in`-scoped form.
113///
114/// `path` is the store-relative target as it would be written inside a
115/// wiki-link (with or without a trailing `.md`; both resolve to the same
116/// target). Returns each linking file as its **canonical bare wiki-link path**
117/// (store-relative, no `.md`) — the same key [`forwardlinks`] emits, so the two
118/// directions round-trip and [`neighborhood`] can use one node identity.
119/// Deduped, sorted, never including the seed itself.
120pub fn backlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
121 backlinks_filtered(store, path, &[], None)
122}
123
124/// Incoming edges to `path`, scoped by the linking file's `type` and/or layer —
125/// the `dbmd graph backlinks --type/--in` surface.
126///
127/// **Scale (the loop contract).** Two paths, by whether the call is scoped:
128///
129/// - **Unscoped** (`types` empty *and* `layer` `None`): one embedded-ripgrep
130/// pass for `[[<target>]]` across the store via [`Store::find_links_to`] — a
131/// single `grep` + `ignore` traversal with early-exit per file, never a
132/// `read_to_string` of every content file. This is the same scan engine
133/// [`crate::validate::validate_working_set`]'s incoming-linker step rides, and
134/// it keeps the unscoped call inside the loop budget (the old per-candidate
135/// confirm-read re-opened every file in the store → O(store)).
136/// - **Scoped** (`types` and/or `layer` set): the candidate set — the files that
137/// *might* link to `path` — is read from `index.jsonl` sidecars (never a
138/// content-tree walk). With a `--in <layer>` the read touches only that layer:
139/// O(entities-in-layer), the sanctioned loop cost. A type-only scope (no `--in`)
140/// reads store-wide sidecars and filters by `type`, exactly as
141/// [`crate::query::Query::execute`] does — so a record of the type filed under a
142/// non-canonical folder of its layer (a `profile` under any `records/<folder>/`)
143/// *and* a **loose file** of the type filed at the *other* layer's root (a `note`
144/// filed directly under `records/`, catalogued in `records/index.jsonl`) are both
145/// candidates. Each candidate is then confirmed by a single-file parse.
146///
147/// **Correctness (one edge set, both paths).** An incoming edge to X is exactly:
148/// some file whose [`forwardlinks`] contains X — a wiki-link in the body or in
149/// *any* frontmatter field (`company: [[…]]`, `attendees: [ … ]`), not just the
150/// sidecar's frontmatter `links:` projection. Both paths honor that:
151/// - The unscoped scan matches the literal `[[<target>]]` text wherever it lives
152/// in a file (frontmatter or body), the same edges [`forwardlinks`] extracts.
153/// [`Store::find_links_to`] returns *every* `.md` carrying the link text
154/// (including `index.md` catalogs); [`backlinks`] is the relationship view, so
155/// the results are filtered to content files ([`is_content_rel`]) and emitted
156/// as canonical bare targets, self-excluded.
157/// - The scoped path confirms each candidate via [`file_links_to`], which
158/// delegates to [`forwardlinks`] (body + every frontmatter field) — so a
159/// body-only or typed-field edge is caught, not just the sidecar's `links:`
160/// list.
161///
162/// Result form (canonical bare paths, deduped, sorted, seed excluded) is
163/// identical on both paths and matches [`backlinks`].
164pub fn backlinks_filtered(
165 store: &Store,
166 path: &Path,
167 types: &[String],
168 layer: Option<Layer>,
169) -> Result<Vec<PathBuf>, StoreError> {
170 let target = normalize_target(path);
171 if target.is_empty() {
172 return Ok(Vec::new());
173 }
174 let target_key = edge_key(&target);
175
176 // Unscoped: one content pass over the store (O(store) scan with early-exit
177 // per file), not a per-candidate read of every content file. `find_links_to`
178 // returns every `.md` carrying an edge to the target (incl. catalog
179 // `index.md`); narrow to content files and canonicalize to the bare target
180 // form `backlinks` emits, dropping the seed's self-link.
181 if types.is_empty() && layer.is_none() {
182 let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
183 for rel in store.find_links_to(path)? {
184 if !is_content_rel(&rel) {
185 continue;
186 }
187 let linker = normalize_target(&rel);
188 if linker.is_empty() || edge_key(&linker) == target_key {
189 // A file never counts as its own backlink (case-folded so a
190 // case-variant self-link is still excluded).
191 continue;
192 }
193 hits.insert(PathBuf::from(linker));
194 }
195 return Ok(hits.into_iter().collect());
196 }
197
198 // Scoped: read only the named folder(s)' sidecars for the candidate set, then
199 // confirm each candidate with a single-file parse — O(folder), the I/O scope
200 // `--type` / `--in` buys.
201 let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
202 for candidate in candidate_records(store, types, layer)? {
203 let rel = &candidate.path;
204 let candidate_target = normalize_target(rel);
205 if candidate_target.is_empty() || edge_key(&candidate_target) == target_key {
206 // A file never counts as its own backlink.
207 continue;
208 }
209 // Confirm the edge by parsing the candidate file the same way
210 // forwardlinks does (body + all frontmatter), so body/typed-field links
211 // are caught — the sidecar's `links` field alone would miss them.
212 if file_links_to(store, rel, &target)? {
213 hits.insert(PathBuf::from(candidate_target));
214 }
215 }
216
217 Ok(hits.into_iter().collect())
218}
219
220/// Outgoing edges from `path`: the wiki-link targets extracted from that single
221/// file. Loop-fast; follow the evidence chain.
222///
223/// `path` is the store-relative path of the file to read. Targets are returned
224/// as store-relative paths (bare, no `.md`), deduped and sorted; the file's
225/// links to itself are dropped. A missing file yields an empty list (a
226/// dangling seed has no outgoing edges to report — broken-link detection is
227/// [`crate::validate`]'s job).
228pub fn forwardlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
229 let self_key = edge_key(&normalize_target(path));
230 let abs = match resolve_existing(store, path) {
231 Some(a) => a,
232 None => return Ok(Vec::new()),
233 };
234 // Decode the body LOSSILY (bytes -> `from_utf8_lossy`): wiki-link syntax
235 // (`[[...]]`) is ASCII, so a non-UTF8 byte elsewhere on a line cannot hide an
236 // edge. This mirrors the unscoped backlink scanner
237 // ([`Store::find_links_to_any`], which reads bytes + lossy by design) so
238 // SCOPED backlinks (which ride `forwardlinks`) agree with unscoped backlinks
239 // on a Latin-1-imported file instead of silently dropping its edges — a
240 // `read_to_string` that errored on `InvalidData` returned NO edges.
241 let body = match std::fs::read(&abs) {
242 Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
243 Err(e) => return Err(StoreError::Io(e)),
244 };
245
246 let mut out: BTreeSet<PathBuf> = BTreeSet::new();
247 for target in extract_link_targets(&body) {
248 // Self-link drop is case-folded so a case-variant self-reference is also
249 // excluded on a case-insensitive filesystem.
250 if target.is_empty() || edge_key(&target) == self_key {
251 continue;
252 }
253 out.insert(PathBuf::from(target));
254 }
255 Ok(out.into_iter().collect())
256}
257
258/// The candidate set for an incoming-edge scan: the sidecar records that could
259/// link to the target, read from the `index.jsonl` sidecars (never a content-tree
260/// walk). `types`/`layer` narrow *which* sidecars are read — the I/O scope that
261/// keeps a typed/layer backlinks O(entities-in-layer) when a layer is named.
262///
263/// - `types` non-empty, `layer` given: read **only that layer's** sidecars
264/// (O(entities-in-layer)) and keep the records whose `type` is in `types`. The
265/// read is *not* short-circuited on a layer that disagrees with a type's
266/// canonical layer, because a record of that type may legitimately be filed
267/// there as a **loose file** (a `note` filed directly at `records/`, catalogued
268/// in `records/index.jsonl`); the `type` filter on the layer read is what keeps
269/// the result correct in either case.
270/// - `types` non-empty, `layer` `None`: read **store-wide** sidecars and keep the
271/// records whose `type` is in `types` — exactly what [`crate::query::Query::execute`]
272/// does for a type-only query. This is complete across every folder *and* every
273/// layer the type is filed under: its canonical-layer records (the common case)
274/// plus any loose file of that type filed at the *other* layer's root.
275/// - `types` empty: every sidecar record under `layer` (or store-wide when
276/// `None`) via [`Store::sidecar_records`].
277///
278/// **Why store-wide (not the type's one canonical layer) for the type-only case.**
279/// [`layer_for_type`](crate::store::layer_for_type) maps a type to exactly ONE
280/// layer (`note` → Sources, `contact`
281/// → Records), but a loose file (SPEC § Loose files) may legitimately be filed at
282/// the *other* layer's root and catalogued in that layer's `index.jsonl`. Reading
283/// only `layer_for_type(T)` would silently drop a records-loose `note` from
284/// `backlinks --type note`, and early-`continue`-ing on `--in records` (because
285/// `records` ≠ `layer_for_type(note)`) would return empty — diverging from the
286/// unscoped scan, from `--type T --in <layer>`, and from `dbmd query --type T`.
287/// Reading store-wide (or the named layer) and filtering by `type` is sidecar-backed
288/// (no content-tree walk) and keeps the scoped edge set equal to the unscoped one.
289/// A `type` can also span several folders within one layer — a conclusion `profile`
290/// filed under any `records/<folder>/`, not only `records/profiles/` — and the
291/// store-wide/layer read covers that too.
292fn candidate_records(
293 store: &Store,
294 types: &[String],
295 layer: Option<Layer>,
296) -> Result<Vec<IndexRecord>, StoreError> {
297 if types.is_empty() {
298 return store.sidecar_records(layer);
299 }
300 let want: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
301 // A layer scope reads only that layer's sidecars (O(entities-in-layer)); with
302 // no layer, read store-wide so a loose file of the type filed at *either*
303 // layer's root is covered — matching `Query::execute`'s type-only candidate
304 // set. The `type` filter (not a per-type canonical-layer guess) is what makes
305 // both correct, so a loose `note` under `records/` is found and a `note` under
306 // `sources/` is excluded when `--in records`.
307 let mut by_path: std::collections::BTreeMap<PathBuf, IndexRecord> =
308 std::collections::BTreeMap::new();
309 for rec in store.sidecar_records(layer)? {
310 if want.contains(rec.type_.as_str()) {
311 by_path.insert(rec.path.clone(), rec);
312 }
313 }
314 Ok(by_path.into_values().collect())
315}
316
317/// True if the store file at `rel` carries a wiki-link whose canonical target
318/// equals `target`. Delegates to [`forwardlinks`] so the incoming-edge predicate
319/// is *exactly* the outgoing-edge extraction — body + every frontmatter field —
320/// keeping the two directions on one edge set. `forwardlinks` already emits
321/// canonical bare targets, so `target` (likewise normalized by the caller) is
322/// compared directly. A missing/binary file links to nothing.
323fn file_links_to(store: &Store, rel: &Path, target: &str) -> Result<bool, StoreError> {
324 let edges = forwardlinks(store, rel)?;
325 let target_key = edge_key(target);
326 // Compare on the case-folded edge key so a case-variant link (e.g.
327 // `[[records/contacts/Sarah-Chen]]` to `sarah-chen.md`) is confirmed on a
328 // case-insensitive filesystem, agreeing with the unscoped scan and validate.
329 Ok(edges
330 .iter()
331 .any(|e| edge_key(&e.to_string_lossy()) == target_key))
332}
333
334/// **Context hydration.** Bounded BFS from `seed` over backlinks + forwardlinks
335/// out to `hops`, reading each reached file's `summary` + relationship, and
336/// returning a readable [`ContextSlice`]. Optionally filtered by `types` and
337/// `direction`. On-demand; no maintained graph. What the agent reaches for to
338/// assemble a working set in one call.
339///
340/// Traversal semantics:
341/// - **`hops`** bounds true graph distance from the seed. `hops == 0` returns
342/// an empty slice (the seed alone is no context).
343/// - **`direction`** selects which edges are followed: `Incoming` walks
344/// backlinks, `Outgoing` walks forwardlinks, `Both` walks the union.
345/// - **`types`**, when non-empty, filters which reached nodes appear in the
346/// slice — but traversal still passes *through* off-type nodes, so a
347/// `meeting` two hops out is still reachable through a `contact` even when
348/// filtering to `meeting`. (An empty `types` slice imposes no filter.)
349/// - Each node records the lowest hop count at which it is first reached (BFS
350/// order); the seed is never included as a node.
351///
352/// Unbounded traversal: delegates to [`neighborhood_capped`] with no node cap, so
353/// it expands every reachable node within `hops`. For a densely-interlinked store
354/// this is one full-store backlinks scan **per reached node** (O(visited × store))
355/// — prefer [`neighborhood_capped`] with a `max_nodes` cap to bound that work.
356pub fn neighborhood(
357 store: &Store,
358 seed: &Path,
359 hops: u32,
360 types: &[String],
361 direction: Direction,
362) -> Result<ContextSlice, StoreError> {
363 neighborhood_capped(store, seed, hops, types, direction, None)
364}
365
366/// [`neighborhood`] with a hard cap on how many nodes the BFS **traverses**.
367///
368/// `max_nodes` bounds the *traversal*, not just the result: each node the BFS
369/// expands triggers a per-node incoming-edge scan (an unscoped [`backlinks`] is a
370/// full-store ripgrep pass), so an uncapped neighborhood of a hub node costs
371/// O(visited × store). A post-hoc `.take(n)` on the returned nodes caps the
372/// *output* but not that work — the scans still run for every reached node. This
373/// cap stops discovering (and therefore stops scanning) once `max_nodes` distinct
374/// non-seed nodes have entered the BFS, so the expensive per-node scans are bounded
375/// to at most `max_nodes` of them. `None` is unbounded (the [`neighborhood`]
376/// behavior).
377///
378/// The cap is applied at *discovery* in BFS order, so the kept nodes are exactly
379/// the first `max_nodes` reached (closest-first by hop), and each still records its
380/// true minimum hop distance. Type-filtered (off-type) nodes count against the cap
381/// because the BFS must still traverse *through* them to reach deeper on-type
382/// nodes — the scan cost is paid when a node is expanded, on- or off-type alike.
383pub fn neighborhood_capped(
384 store: &Store,
385 seed: &Path,
386 hops: u32,
387 types: &[String],
388 direction: Direction,
389 max_nodes: Option<usize>,
390) -> Result<ContextSlice, StoreError> {
391 let seed_rel = PathBuf::from(normalize_target(seed));
392 let type_filter: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
393
394 // `discovered` guards against revisiting a node (and against re-adding the
395 // seed). BFS by levels so the first time we reach a node is its true min
396 // hop distance.
397 let mut discovered: HashSet<PathBuf> = HashSet::new();
398 discovered.insert(seed_rel.clone());
399
400 let mut nodes: Vec<ContextNode> = Vec::new();
401 let mut frontier: VecDeque<PathBuf> = VecDeque::new();
402 frontier.push_back(seed_rel.clone());
403
404 // Count of distinct non-seed nodes admitted to the BFS. Once it hits
405 // `max_nodes` we stop discovering new nodes, which stops enqueuing them, which
406 // stops the per-node full-store backlinks scan they would have triggered — the
407 // cap bounds the *traversal cost*, not only the printed result.
408 let mut admitted = 0usize;
409 let cap_reached = |admitted: usize| max_nodes.is_some_and(|cap| admitted >= cap);
410
411 let mut hop = 0u32;
412 while hop < hops && !frontier.is_empty() && !cap_reached(admitted) {
413 hop += 1;
414 let level_size = frontier.len();
415 for _ in 0..level_size {
416 if cap_reached(admitted) {
417 break;
418 }
419 let current = frontier.pop_front().expect("frontier non-empty");
420
421 // Collect this node's edges in the requested direction(s). Each
422 // edge carries the neighbor path + the direction we traversed it.
423 let mut edges: Vec<(PathBuf, Direction)> = Vec::new();
424 if matches!(direction, Direction::Outgoing | Direction::Both) {
425 for nbr in forwardlinks(store, ¤t)? {
426 edges.push((nbr, Direction::Outgoing));
427 }
428 }
429 if matches!(direction, Direction::Incoming | Direction::Both) {
430 for nbr in backlinks(store, ¤t)? {
431 edges.push((nbr, Direction::Incoming));
432 }
433 }
434
435 for (neighbor, dir) in edges {
436 if cap_reached(admitted) {
437 break;
438 }
439 // Drop a neighbor that exists on disk but resolves OUTSIDE the
440 // store via a symlinked path component — it is not a real in-store
441 // edge, exactly as a `..` escape is dropped at edge extraction. This
442 // yields no node (and no traversal through it), closing the
443 // `graph neighborhood` disclosure vector at the graph boundary.
444 if target_escapes_store(store, &neighbor) {
445 continue;
446 }
447 if !discovered.insert(neighbor.clone()) {
448 continue;
449 }
450 admitted += 1;
451 let (summary, type_) = read_summary_and_type(store, &neighbor);
452 let include = type_filter.is_empty()
453 || type_
454 .as_deref()
455 .map(|t| type_filter.contains(t))
456 .unwrap_or(false);
457 if include {
458 nodes.push(ContextNode {
459 path: neighbor.clone(),
460 summary,
461 type_,
462 hops: hop,
463 via: Some((current.clone(), dir)),
464 });
465 }
466 // Off-type nodes are not emitted but still seed the next BFS
467 // level, so the type filter narrows the *result*, not the
468 // reachable graph.
469 frontier.push_back(neighbor);
470 }
471 }
472 }
473
474 Ok(ContextSlice {
475 seed: seed_rel,
476 nodes,
477 })
478}
479
480/// **SWEEP.** Content files with no incoming AND no outgoing wiki-links — the
481/// curation worklist ("ingested but not yet wired into the wiki"). Off the
482/// loop. Optionally scoped to a layer.
483///
484/// A file is an orphan iff it neither links out to another store file nor is
485/// linked to by one. Incoming edges are counted across the *whole* store
486/// (a link from any layer un-orphans a file), even when `layer` scopes the
487/// candidate set. Returns store-relative paths, sorted.
488pub fn orphans(store: &Store, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
489 // One walk of the whole store: for every content file, record (a) whether
490 // it has any outgoing link, and (b) accumulate the set of every target any
491 // file links to (its incoming-edge set). Both come from a single read per
492 // file — the SWEEP cost.
493 let all = walk_content_files(store)?;
494
495 // Every walked content file's edge KEY (NFC-folded, `.md`-stripped). A
496 // wiki-link counts as a live incoming/outgoing edge when it resolves on disk
497 // OR its edge key matches a walked file's. The key match is what makes a
498 // cross-NORMALIZATION link a real edge on a byte-exact filesystem: an NFD
499 // link to an NFC-named file (or vice versa) does NOT satisfy
500 // `resolve_existing`'s `is_file` on Linux (the bytes differ), though it does
501 // on macOS/APFS (which folds NFC/NFD). `link_edge_key` NFC-folds both sides,
502 // so the keys agree on every platform — without this, `orphans` flagged a
503 // live cross-normalization target as an orphan on Linux while macOS hid it.
504 let content_keys: HashSet<String> = all
505 .iter()
506 .filter_map(|abs| rel_path(store, abs))
507 .map(|rel| edge_key(&normalize_target(&rel)))
508 .collect();
509
510 // `linked_to` holds case-folded edge KEYS (not raw paths): the link text may
511 // spell a target with different casing than the on-disk file (e.g.
512 // `[[records/contacts/Sarah-Chen]]` → `sarah-chen.md`), and on a
513 // case-insensitive filesystem that is a real incoming edge. Keying on
514 // `edge_key` so the incoming-edge lookup case-folds is what stops the
515 // false-positive orphan (a file with a live case-variant link reported as
516 // orphaned) — and matches validate, which resolves the same link via the
517 // case-insensitive filesystem.
518 let mut linked_to: HashSet<String> = HashSet::new();
519 let mut has_outgoing: HashMap<PathBuf, bool> = HashMap::new();
520
521 for abs in &all {
522 let rel = match rel_path(store, abs) {
523 Some(r) => r,
524 None => continue,
525 };
526 let self_key = edge_key(&normalize_target(&rel));
527
528 // Lossy decode (see `forwardlinks`): a non-UTF8 byte must not hide a
529 // `[[...]]` edge, or `orphans` would over-report BOTH endpoints of a live
530 // edge as orphans (and `stats` would inflate the orphan count) on a file
531 // with a stray Latin-1 byte beside a valid ASCII link line.
532 let body = match std::fs::read(abs) {
533 Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
534 Err(e) => return Err(StoreError::Io(e)),
535 };
536
537 let mut outgoing = false;
538 for target in extract_link_targets(&body) {
539 if target.is_empty() || edge_key(&target) == self_key {
540 continue;
541 }
542 // A live edge: resolves on disk (handles raw `.eml`/`.pdf` sources and
543 // store containment) OR matches a walked content file by NFC-folded
544 // key (the cross-normalization case `resolve_existing` misses on a
545 // byte-exact filesystem).
546 if resolve_existing(store, Path::new(&target)).is_none()
547 && !content_keys.contains(&edge_key(&target))
548 {
549 continue;
550 }
551 outgoing = true;
552 linked_to.insert(edge_key(&target));
553 }
554 has_outgoing.insert(rel, outgoing);
555 }
556
557 let mut out: BTreeSet<PathBuf> = BTreeSet::new();
558 for abs in &all {
559 let rel = match rel_path(store, abs) {
560 Some(r) => r,
561 None => continue,
562 };
563 if let Some(layer) = layer {
564 if path_layer(&rel) != Some(layer) {
565 continue;
566 }
567 }
568 let outgoing = has_outgoing.get(&rel).copied().unwrap_or(false);
569 let incoming = linked_to.contains(&edge_key(&normalize_target(&rel)));
570 if !outgoing && !incoming {
571 out.insert(rel);
572 }
573 }
574
575 Ok(out.into_iter().collect())
576}
577
578/// **Write-side.** Rewrite every incoming `[[old]]` wiki-link in `text` to
579/// `[[new]]`, preserving any `|display` override and emitting the canonical bare
580/// target (no `.md`). The write-side twin of [`backlinks`]: where `backlinks`
581/// *finds* the files carrying an edge to `old`, this *retargets* that edge to
582/// `new` inside one file's contents.
583///
584/// `old` and `new` are store-relative paths in the wiki-link sense — both are
585/// passed through the same [`normalize_target`] the read side keys on, so the
586/// `.md` and bare spellings of `old` collapse to one target and a match here is
587/// exactly a match [`backlinks`] / [`Store::find_links_to`](crate::Store::find_links_to)
588/// would report. A link is rewritten iff its normalized target equals
589/// `normalize_target(old)`; prefix collisions (`old=a/b` vs `[[a/bc]]`) and
590/// short-form links never match. Returns the rewritten text (identical to the
591/// input when nothing matched), so the caller can cheaply detect a no-op.
592///
593/// Operates on the raw text (not a parser round-trip) so a link in frontmatter
594/// or body is retargeted uniformly and nothing else is reflowed — **except** a
595/// `[[...]]` inside a ``` fenced code block, which is a documentation example,
596/// not an edge: `rename` must NOT mutate fenced verbatim content (validate
597/// treats fenced links as non-edges, so rewriting them silently corrupts the
598/// example and makes rename disagree with validate). Matching is fence-aware,
599/// whitespace-trimmed, and case-folded to the filesystem, the exact edge notion
600/// [`backlinks`]/[`forwardlinks`] use — so rename retargets precisely the edges
601/// those report and nothing else.
602pub fn rewrite_links_to(text: &str, old: &Path, new: &Path) -> String {
603 let old_target = normalize_target(old);
604 let new_target = normalize_target(new);
605 if old_target.is_empty() {
606 // No target to match → never rewrite anything.
607 return text.to_string();
608 }
609 let old_key = edge_key(&old_target);
610
611 let mut out = String::with_capacity(text.len());
612
613 // Split off the leading `---`…`---` frontmatter block exactly like the read
614 // side ([`Store::extract_edge_targets`] via `split_frontmatter_raw`): the
615 // frontmatter is YAML, NOT markdown — it has no code fences, and a `[[…]]`
616 // in any frontmatter field is a real edge. So the frontmatter region is
617 // rewrite-scanned WITHOUT fence tracking, and the body is rewrite-scanned
618 // with a FRESH fence state. Without this boundary reset, a stray ``` / `~~~`
619 // inside a frontmatter block scalar opens a fence that persists into the
620 // body, so every body `[[…]]` is treated as fenced and silently skipped —
621 // leaving a dangling link after rename even though `backlinks`/`forwardlinks`
622 // (which DO reset at this boundary) still report the body edge. Returns
623 // byte offsets so the `---` fence lines and everything else are copied
624 // byte-exact; the only mutation is a matched `[[…]]` retarget.
625 let body_start = match frontmatter_body_split(text) {
626 Some(body_offset) => {
627 // Frontmatter prefix = `0..body_offset` (the opening `---` line, the
628 // YAML, and the closing `---` line). Scan it line-by-line with
629 // rewriting on and NO fence state: the literal `---` fence lines
630 // never match link syntax (rewrite is a no-op on them), and any
631 // real `[[…]]` in a YAML field is retargeted.
632 for line in text[..body_offset].split_inclusive('\n') {
633 rewrite_links_in_line(line, &old_key, &new_target, &mut out);
634 }
635 body_offset
636 }
637 // No leading frontmatter block → the whole text is body.
638 None => 0,
639 };
640
641 // Body scan with a FRESH fence state. Track the fence as a `(byte, run
642 // length)` exactly like validate and `extract_edge_targets` (NOT a bool
643 // toggled on any ``` / ~~~ line). The naive toggle flips mid-block on a
644 // nested/indented/long-run fence, so a fenced example link would be
645 // rewritten — corrupting documentation and making rename disagree with
646 // validate's edge notion.
647 let mut fence: Option<(u8, usize)> = None;
648 // `split_inclusive` keeps each line's trailing `\n`, so copying a chunk
649 // verbatim preserves the original line endings exactly.
650 for line in text[body_start..].split_inclusive('\n') {
651 // The fence rules key on line content without trailing `\r`/`\n`; the
652 // full chunk (line endings intact) is what we copy verbatim.
653 let content = line.trim_end_matches('\n').trim_end_matches('\r');
654 if let Some(f) = fence {
655 // Inside a fenced code block: copy verbatim, never rewrite. Only a
656 // matching closing fence ends the block.
657 if fence_closes(content, f) {
658 fence = None;
659 }
660 out.push_str(line);
661 continue;
662 }
663 if let Some(opened) = fence_opens(content) {
664 fence = Some(opened);
665 out.push_str(line);
666 continue;
667 }
668 rewrite_links_in_line(line, &old_key, &new_target, &mut out);
669 }
670 out
671}
672
673/// Byte offset where the body begins after a leading `---`…`---` frontmatter
674/// block — i.e. the first byte past the closing `---` line's `\n`. `None` when
675/// the text does not open with a `---` fence or has no closing fence (the caller
676/// then treats the whole text as body). Local mirror of store's
677/// `split_frontmatter_raw` boundary detection (BOM- and CRLF-tolerant) — kept
678/// in graph.rs so the module stays self-contained, paired with the existing
679/// `frontmatter_block` mirror. Returns an offset (not slices) so
680/// [`rewrite_links_to`] can copy the frontmatter and body regions byte-exact and
681/// scan them with different fence policies.
682fn frontmatter_body_split(text: &str) -> Option<usize> {
683 // Tolerate a single leading UTF-8 BOM, matching parser/store/index/validate.
684 let bom = if text.starts_with('\u{feff}') {
685 '\u{feff}'.len_utf8()
686 } else {
687 0
688 };
689 let after_open = if text[bom..].starts_with("---\n") {
690 bom + 4
691 } else if text[bom..].starts_with("---\r\n") {
692 bom + 5
693 } else {
694 return None;
695 };
696 // Walk lines from just after the opening fence; the body starts right after
697 // the line that is exactly `---`.
698 let mut idx = after_open;
699 for line in text[after_open..].split_inclusive('\n') {
700 let trimmed = line.trim_end_matches(['\r', '\n']);
701 idx += line.len();
702 if trimmed == "---" {
703 return Some(idx);
704 }
705 }
706 None
707}
708
709/// Rewrite every `[[...]]` on a single (non-fenced) line whose target matches
710/// `old_key`, appending the result to `out`. Preserves any `|display` override
711/// verbatim and emits the canonical bare `new_target`. A `[[...]]` whose target
712/// does not match (a prefix sibling, the short form, an unrelated target) is
713/// copied through untouched.
714fn rewrite_links_in_line(line: &str, old_key: &str, new_target: &str, out: &mut String) {
715 let bytes = line.as_bytes();
716 let mut i = 0usize;
717 let mut last = 0usize;
718 while i + 1 < bytes.len() {
719 if bytes[i] == b'[' && bytes[i + 1] == b'[' {
720 if let Some(close) = line[i + 2..].find("]]") {
721 let inner = &line[i + 2..i + 2 + close];
722 // An embedded newline means this isn't a single-line link.
723 if !inner.contains('\n') {
724 let (raw_target, display) = match inner.split_once('|') {
725 Some((t, d)) => (t, Some(d)),
726 None => (inner, None),
727 };
728 let raw_target = raw_target.trim();
729 // Match on the SAME edge key the read side uses, so `[[old]]`,
730 // `[[old.md]]`, `[[ ./old ]]`, and (case-insensitive FS)
731 // `[[Old]]` all retarget while `[[old-jr]]` never does.
732 if !raw_target.is_empty()
733 && !raw_target.starts_with('[')
734 && edge_key(&canonical_link_target(raw_target)) == old_key
735 {
736 out.push_str(&line[last..i]);
737 out.push_str("[[");
738 out.push_str(new_target);
739 if let Some(display) = display {
740 out.push('|');
741 out.push_str(display);
742 }
743 out.push_str("]]");
744 i = i + 2 + close + 2;
745 last = i;
746 continue;
747 }
748 }
749 // Not a matching link: skip past this `]]` so an inner `[[`
750 // isn't re-scanned, but leave the text for the verbatim copy.
751 i = i + 2 + close + 2;
752 continue;
753 }
754 }
755 i += 1;
756 }
757 out.push_str(&line[last..]);
758}
759
760// ── Private helpers ─────────────────────────────────────────────────────────
761
762/// Normalize a store-relative path into the canonical wiki-link target form:
763/// forward slashes, no leading `./` or `/`, and no trailing `.md`. This is the
764/// canonical (case-PRESERVING) identity used for output and rewrites; edge
765/// *comparisons* go through [`edge_key`] so the `.md`/bare forms AND (on a
766/// case-insensitive filesystem) case-variant spellings of a target unify. The
767/// shared [`canonical_link_target`] is the single definition every db.md
768/// link op keys on.
769fn normalize_target(path: &Path) -> String {
770 canonical_link_target(&path.to_string_lossy())
771}
772
773/// The comparison key for an edge: the canonical target case-folded to the
774/// filesystem (identity on a case-sensitive FS, lowercased on macOS/Windows), so
775/// the string-keyed graph compares agree with the filesystem's case-insensitive
776/// `is_file()` resolution. `[[records/contacts/Sarah-Chen]]` and the on-disk
777/// `sarah-chen.md` must be the same edge on a case-insensitive filesystem or
778/// backlinks/orphans/rename silently disagree with validate.
779fn edge_key(canonical_target: &str) -> String {
780 link_edge_key(canonical_target)
781}
782
783/// Extract every wiki-link target from a body, normalized to the canonical
784/// store-relative form. Fence-aware and whitespace-trimmed via the shared
785/// [`extract_edge_targets`] — a `[[...]]` inside a ``` fenced code block is a
786/// documentation example, NOT an edge (matching validate), and `[[ x ]]`
787/// padding resolves identically to `[[x]]`. A target that would escape the store
788/// root (a `..` component) is dropped here too, so an escaping `[[../outside/x]]`
789/// is never reported as a forward edge and never seeds a [`neighborhood`]
790/// traversal out of the store (the disclosure vector validate flags as an
791/// error). Order-preserving; duplicates kept (callers dedup).
792fn extract_link_targets(body: &str) -> Vec<String> {
793 extract_edge_targets(body)
794 .into_iter()
795 .filter(|t| is_within_store_target(t))
796 .collect()
797}
798
799/// True if a canonical target stays inside the store: it has no `..`
800/// (`ParentDir`) component. The canonical form has already stripped any leading
801/// `./` or `/`, so a `Normal`-only path is a safe store-relative key; a `..`
802/// component is an escape and is rejected, mirroring validate's safe-path guard.
803fn is_within_store_target(target: &str) -> bool {
804 Path::new(target)
805 .components()
806 .all(|c| matches!(c, std::path::Component::Normal(_)))
807}
808
809/// Resolve the store root + a store-relative path to the absolute on-disk file,
810/// trying the path as written and then with a `.md` extension. `None` if neither
811/// exists **or if the target resolves outside the store root** — a `..`-laden or
812/// symlink-escaping wiki-link must never turn a graph read/traversal into a read
813/// of an arbitrary file outside the store (the `dbmd graph neighborhood`
814/// disclosure vector). Containment is enforced via the shared
815/// [`ensure_path_within_store`] gate, matching validate's safe-path guard.
816fn resolve_existing(store: &Store, store_relative: &Path) -> Option<PathBuf> {
817 let direct = store.root.join(store_relative);
818 if direct.is_file() && resolves_within_store(store, &direct) {
819 return Some(direct);
820 }
821 let normalized = normalize_target(store_relative);
822 let with_md = store.root.join(format!("{normalized}.md"));
823 if with_md.is_file() && resolves_within_store(store, &with_md) {
824 return Some(with_md);
825 }
826 None
827}
828
829/// True if a store-relative wiki-link target exists on disk but **resolves
830/// outside the store** — i.e. some `Normal` component is a symlink redirecting to
831/// an external dir/file (`records/linkdir/secret` through `records/linkdir ->
832/// /external`, or a directly-symlinked `records/aliased.md -> /external/x.md`).
833///
834/// This is the symlink twin of the `..` escape that [`is_within_store_target`]
835/// drops at edge *extraction*: a `..` target is rejected by its spelling, but a
836/// symlink escape is spelled with only `Normal` components and can only be caught
837/// by resolving the path. [`neighborhood_capped`] uses this to drop such a
838/// neighbor from the traversal entirely, so an escaping symlink yields **no node**
839/// (matching the `..` control) rather than a phantom node whose summary/type are
840/// blanked — closing the `graph neighborhood` disclosure vector at the graph
841/// boundary, not only at the file read.
842///
843/// A genuinely *dangling* in-store link (a target that exists nowhere) is **not**
844/// an escape: it does not resolve on disk at all, so this returns `false` and the
845/// dangling target is still surfaced as a node (existing behavior; broken-link
846/// reporting is [`crate::validate`]'s job).
847fn target_escapes_store(store: &Store, store_relative: &Path) -> bool {
848 // Already in-store-resolvable → not an escape.
849 if resolve_existing(store, store_relative).is_some() {
850 return false;
851 }
852 // Not resolvable in-store: is it because it points OUTSIDE (a symlink escape),
853 // or because it does not exist at all (a dangling link)? It escapes iff the
854 // path (as written or with `.md`) exists on disk yet fails containment.
855 let direct = store.root.join(store_relative);
856 if direct.exists() && !resolves_within_store(store, &direct) {
857 return true;
858 }
859 let normalized = normalize_target(store_relative);
860 let with_md = store.root.join(format!("{normalized}.md"));
861 with_md.exists() && !resolves_within_store(store, &with_md)
862}
863
864/// Containment check for a candidate on-disk path. Always routes through the
865/// authoritative, symlink-resolving [`ensure_path_within_store`] gate — the only
866/// thing that can prove an escaping or symlink-redirected path actually stays
867/// inside the store.
868///
869/// There is deliberately **no** "all-`Normal`-components" fast path that returns
870/// `true` without canonicalizing. A `Normal` component is not safe by spelling:
871/// it can itself be a symlink to a directory or file outside the store
872/// (`records/linkdir -> /etc`, or a directly-symlinked `records/aliased.md ->
873/// ../../outside/secret.md`). `store.root.join(rel)` follows that in-store symlink,
874/// `is_file()` succeeds (it follows symlinks), and without canonicalizing the
875/// resolved target the out-of-store file's `summary`/`type` leak into a
876/// `graph neighborhood` slice. `ensure_path_within_store` canonicalizes `abs`
877/// (resolving every symlink in its chain) and confirms the result is under the
878/// canonicalized root, closing that disclosure vector — the same gate the `..`
879/// path already passes through.
880fn resolves_within_store(store: &Store, abs: &Path) -> bool {
881 ensure_path_within_store(&store.root, abs).is_ok()
882}
883
884/// Convert an absolute path under the store root into its store-relative form.
885fn rel_path(store: &Store, abs: &Path) -> Option<PathBuf> {
886 abs.strip_prefix(&store.root).ok().map(|p| p.to_path_buf())
887}
888
889/// Which layer a store-relative path sits in, by its first component.
890fn path_layer(rel: &Path) -> Option<Layer> {
891 let first = rel.components().next()?;
892 match first.as_os_str().to_str()? {
893 "sources" => Some(Layer::Sources),
894 "records" => Some(Layer::Records),
895 _ => None,
896 }
897}
898
899/// True if a store-relative path is a *content* file: under `sources/` or
900/// `records/`, a `.md` file, and not an `index.md`. Meta files
901/// (`DB.md`, `log.md`, `log/…`, sidecars) are excluded.
902fn is_content_rel(rel: &Path) -> bool {
903 if path_layer(rel).is_none() {
904 return false;
905 }
906 match rel.extension().and_then(|e| e.to_str()) {
907 Some("md") => {}
908 _ => return false,
909 }
910 rel.file_name().and_then(|n| n.to_str()) != Some("index.md")
911}
912
913/// Walk every content `.md` file in the store via the **`ignore`** walker
914/// (the ripgrep directory engine). Only the two layer roots
915/// (`sources/`/`records/`) are descended, so `DB.md`, `log.md`, and
916/// `log/` at the store root are structurally never reached; hidden dirs and
917/// per-folder `index.md` sidecars are filtered out ([`is_content_rel`]). Honors
918/// `.gitignore` the way `rg` does. Returns absolute paths. SWEEP-class.
919fn walk_content_files(store: &Store) -> Result<Vec<PathBuf>, StoreError> {
920 let mut out = Vec::new();
921 for layer in Layer::all() {
922 let dir = store.root.join(layer_dir_name(layer));
923 if !dir.is_dir() {
924 continue;
925 }
926 let walker = WalkBuilder::new(&dir)
927 .hidden(true)
928 .git_ignore(true)
929 .git_global(false)
930 .require_git(false)
931 // Follow symlinks so a symlinked `.md` content file or a symlinked
932 // type folder is walked like any other content (consistent with the
933 // store SWEEP walker), rather than silently vanishing from orphans.
934 .follow_links(true)
935 .build();
936 for result in walker {
937 let entry = result.map_err(|e| StoreError::Search {
938 root: store.root.clone(),
939 message: format!("walk failed: {e}"),
940 })?;
941 // A followed symlink entry reports its own type as `is_symlink()`, so
942 // also accept a symlink whose target is a regular file.
943 let is_file = match entry.file_type() {
944 Some(ft) if ft.is_file() => true,
945 Some(ft) if ft.is_symlink() => std::fs::metadata(entry.path())
946 .map(|m| m.is_file())
947 .unwrap_or(false),
948 _ => false,
949 };
950 if !is_file {
951 continue;
952 }
953 let abs = entry.into_path();
954 if let Some(rel) = rel_path(store, &abs) {
955 if is_content_rel(&rel) {
956 out.push(abs);
957 }
958 }
959 }
960 }
961 Ok(out)
962}
963
964/// The on-disk folder name for a layer. Mirrors `Layer::dir_name`; kept local
965/// so the graph module owns its own copy rather than coupling to that body.
966fn layer_dir_name(layer: Layer) -> &'static str {
967 match layer {
968 Layer::Sources => "sources",
969 Layer::Records => "records",
970 }
971}
972
973/// Read a reached node's `summary` and `type` from its frontmatter. A missing
974/// file, missing frontmatter, or unparseable YAML degrades to an empty summary
975/// / unknown type rather than failing the whole hydration — `neighborhood` is
976/// best-effort context assembly, not validation.
977fn read_summary_and_type(store: &Store, rel: &Path) -> (String, Option<String>) {
978 let abs = match resolve_existing(store, rel) {
979 Some(a) => a,
980 None => return (String::new(), None),
981 };
982 // Lossy decode so a node's summary/type still resolve when the file carries
983 // a stray non-UTF8 byte (consistent with the edge readers above).
984 let text = match std::fs::read(&abs) {
985 Ok(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
986 Err(_) => return (String::new(), None),
987 };
988 let yaml = match frontmatter_block(&text) {
989 Some(y) => y,
990 None => return (String::new(), None),
991 };
992 let value: serde_norway::Value = match serde_norway::from_str(yaml) {
993 Ok(v) => v,
994 Err(_) => return (String::new(), None),
995 };
996 let summary = value
997 .get("summary")
998 .and_then(|v| v.as_str())
999 .unwrap_or("")
1000 .to_string();
1001 let type_ = value
1002 .get("type")
1003 .and_then(|v| v.as_str())
1004 .map(|s| s.to_string());
1005 (summary, type_)
1006}
1007
1008/// Return the YAML between the opening and closing `---` fences (exclusive), or
1009/// `None` if the text has no leading frontmatter block. Local mirror of the
1010/// parser's split so the graph module stays self-contained.
1011fn frontmatter_block(text: &str) -> Option<&str> {
1012 // Tolerate a single leading UTF-8 BOM, matching parser/store/index/validate.
1013 let text = text.strip_prefix('\u{feff}').unwrap_or(text);
1014 let rest = text
1015 .strip_prefix("---\n")
1016 .or_else(|| text.strip_prefix("---\r\n"))?;
1017 // Find the closing fence: a line that is exactly `---`.
1018 let mut idx = 0usize;
1019 for line in rest.split_inclusive('\n') {
1020 let trimmed = line.trim_end_matches(['\r', '\n']);
1021 if trimmed == "---" {
1022 return Some(&rest[..idx]);
1023 }
1024 idx += line.len();
1025 }
1026 None
1027}
1028
1029#[cfg(test)]
1030mod tests {
1031 use super::*;
1032 use std::fs;
1033 use tempfile::TempDir;
1034
1035 use crate::parser::Config;
1036
1037 // ── Fixture builder ─────────────────────────────────────────────────────
1038 //
1039 // A real on-disk store in a tempdir. We write actual files (frontmatter +
1040 // wiki-links) and exercise the real code paths. The fixture constructs the
1041 // `Store` by its public fields rather than `Store::open`, so the graph
1042 // tests stand on their own and do not depend on any other module's
1043 // behavior. Each test asserts the behavior the SPEC promises, derived from
1044 // intent, never from echoing the function's own output.
1045 //
1046 // `backlinks` (and `neighborhood` in any incoming direction) enumerate their
1047 // candidate set from the type-folder `index.jsonl` sidecars — the loop
1048 // contract: never a whole-store content walk. A real db.md store maintains
1049 // those sidecars write-through, so a test that exercises backlinks must call
1050 // [`Fixture::reindex`] after writing its files to build them (the SWEEP that
1051 // `dbmd index rebuild` runs). Forwardlinks/orphans read content directly and
1052 // need no sidecar.
1053
1054 struct Fixture {
1055 _tmp: TempDir,
1056 store: Store,
1057 }
1058
1059 impl Fixture {
1060 fn new() -> Self {
1061 let tmp = TempDir::new().expect("tempdir");
1062 let root = tmp.path().to_path_buf();
1063 fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n# store\n").expect("DB.md");
1064 let store = Store {
1065 root,
1066 config: Config::default(),
1067 };
1068 Fixture { _tmp: tmp, store }
1069 }
1070
1071 /// Write a content file at a store-relative path with the given type,
1072 /// summary, and body. Creates parent dirs.
1073 fn write(&self, rel: &str, type_: &str, summary: &str, body: &str) {
1074 let abs = self.store.root.join(rel);
1075 fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
1076 let contents = format!(
1077 "---\ntype: {type_}\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: {summary}\n---\n{body}\n"
1078 );
1079 fs::write(&abs, contents).expect("write file");
1080 }
1081
1082 /// Write a raw file verbatim (for frontmatter-shape edge cases).
1083 fn write_raw(&self, rel: &str, contents: &str) {
1084 let abs = self.store.root.join(rel);
1085 fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
1086 fs::write(&abs, contents).expect("write raw");
1087 }
1088
1089 /// Build the type-folder `index.jsonl` sidecars from the content written
1090 /// so far — the state a real store is always in (write-through), and the
1091 /// candidate set `backlinks` reads. Call after writing files in any test
1092 /// that exercises `backlinks` or an incoming-direction `neighborhood`.
1093 fn reindex(&self) {
1094 crate::index::Index::rebuild_all(&self.store).expect("rebuild sidecars");
1095 }
1096
1097 fn p(&self, rel: &str) -> PathBuf {
1098 PathBuf::from(rel)
1099 }
1100 }
1101
1102 fn paths(v: &[PathBuf]) -> Vec<String> {
1103 v.iter()
1104 .map(|p| p.to_string_lossy().replace('\\', "/"))
1105 .collect()
1106 }
1107
1108 // ── normalize_target ────────────────────────────────────────────────────
1109
1110 #[test]
1111 fn normalize_strips_md_and_leading_dotslash() {
1112 assert_eq!(
1113 normalize_target(Path::new("records/contacts/sarah.md")),
1114 "records/contacts/sarah"
1115 );
1116 assert_eq!(
1117 normalize_target(Path::new("./records/profiles/elena")),
1118 "records/profiles/elena"
1119 );
1120 assert_eq!(normalize_target(Path::new("/records/x")), "records/x");
1121 // Bare and `.md` forms must collapse to the same key, or edges won't unify.
1122 assert_eq!(
1123 normalize_target(Path::new("a/b")),
1124 normalize_target(Path::new("a/b.md"))
1125 );
1126 }
1127
1128 // ── extract_link_targets (forwardlinks core) ────────────────────────────
1129
1130 #[test]
1131 fn extract_handles_display_text_and_md_suffix() {
1132 let body = "See [[records/profiles/sarah-chen|Sarah]] and [[records/contacts/elena.md]].";
1133 let got = extract_link_targets(body);
1134 assert_eq!(
1135 got,
1136 vec!["records/profiles/sarah-chen", "records/contacts/elena"]
1137 );
1138 }
1139
1140 #[test]
1141 fn extract_ignores_external_markdown_links() {
1142 // Standard markdown links are NOT wiki-links and must not be extracted
1143 // (SPEC: external refs don't participate in the graph).
1144 let body = "[Acme](https://acme.io) but [[records/companies/acme]] is internal.";
1145 let got = extract_link_targets(body);
1146 assert_eq!(got, vec!["records/companies/acme"]);
1147 }
1148
1149 #[test]
1150 fn extract_display_text_is_not_treated_as_a_target() {
1151 // A `|display` segment that looks path-like must not become a target;
1152 // only the part before `|` is the link target.
1153 let body = "[[records/contacts/sarah|sources/emails/decoy]]";
1154 let got = extract_link_targets(body);
1155 assert_eq!(got, vec!["records/contacts/sarah"]);
1156 }
1157
1158 // ── rewrite_links_to (write-side twin of backlinks) ─────────────────────
1159
1160 #[test]
1161 fn rewrite_plain_link_to_canonical_new_target() {
1162 let got = rewrite_links_to(
1163 "See [[records/contacts/sarah-chen]] today.",
1164 Path::new("records/contacts/sarah-chen"),
1165 Path::new("records/contacts/sarah-chen-acme"),
1166 );
1167 assert_eq!(got, "See [[records/contacts/sarah-chen-acme]] today.");
1168 }
1169
1170 #[test]
1171 fn rewrite_preserves_display_override() {
1172 let got = rewrite_links_to(
1173 "With [[records/contacts/sarah-chen|Sarah]].",
1174 Path::new("records/contacts/sarah-chen"),
1175 Path::new("records/contacts/sarah-chen-acme"),
1176 );
1177 assert_eq!(got, "With [[records/contacts/sarah-chen-acme|Sarah]].");
1178 }
1179
1180 #[test]
1181 fn rewrite_matches_md_suffixed_old_and_emits_bare_new() {
1182 // The `.md` spelling of the old target must match (it normalizes to the
1183 // same key the read side uses), and the new target is emitted bare —
1184 // the writer doctrine validate enforces (`WIKI_LINK_HAS_EXTENSION`).
1185 let got = rewrite_links_to(
1186 "[[records/contacts/sarah-chen.md]]",
1187 Path::new("records/contacts/sarah-chen"),
1188 Path::new("records/contacts/new.md"),
1189 );
1190 assert_eq!(got, "[[records/contacts/new]]");
1191 }
1192
1193 #[test]
1194 fn rewrite_leaves_prefix_collisions_and_short_form_untouched() {
1195 // Boundary correctness, anchored to the SAME normalize_target the read
1196 // side keys on: `records/contacts/sarah-chen` must NOT match the longer
1197 // `[[…-jr]]`, the short-form `[[sarah-chen]]`, or an unrelated target.
1198 let input = "[[records/contacts/sarah-chen-jr]] [[sarah-chen]] [[records/concepts/x]]";
1199 let got = rewrite_links_to(
1200 input,
1201 Path::new("records/contacts/sarah-chen"),
1202 Path::new("records/contacts/new"),
1203 );
1204 assert_eq!(got, input, "no genuine edge to the seed → text unchanged");
1205 }
1206
1207 #[test]
1208 fn rewrite_handles_multiple_occurrences_and_mixed_spellings() {
1209 let got = rewrite_links_to(
1210 "[[records/x]] then [[./records/x]] and [[records/x.md|d]] end",
1211 Path::new("records/x"),
1212 Path::new("records/y"),
1213 );
1214 // All three spellings of the same target retarget; the display survives.
1215 assert_eq!(
1216 got,
1217 "[[records/y]] then [[records/y]] and [[records/y|d]] end"
1218 );
1219 }
1220
1221 #[test]
1222 fn rewrite_retargets_exactly_the_edges_the_core_parser_sees() {
1223 // The load-bearing property of moving the rewrite into core: the write
1224 // side must operate on EXACTLY the edge set the read side recognizes —
1225 // the same `extract_link_targets` / `normalize_target` grammar that
1226 // `forwardlinks` is built on. Anchor the test to that grammar (via
1227 // `forwardlinks` on a real file) rather than re-listing literals, so a
1228 // future divergence between the read parser and the write rewrite fails
1229 // here. (Coupled to `forwardlinks` — the single-file edge extractor —
1230 // not the multi-file `backlinks` traversal, so it tests the grammar, not
1231 // the walk.)
1232 let fx = Fixture::new();
1233 let body = "Met [[records/contacts/sarah.md|Sarah]] and not [[records/contacts/sarah-2]].";
1234 fx.write("records/profiles/bio.md", "profile", "bio", body);
1235
1236 // Read side: the parser sees two outgoing edges, both in canonical bare
1237 // form (the `.md` spelling collapsed). `sarah` is a real edge here.
1238 let edges = forwardlinks(&fx.store, &fx.p("records/profiles/bio.md")).unwrap();
1239 assert_eq!(
1240 paths(&edges),
1241 vec!["records/contacts/sarah", "records/contacts/sarah-2"],
1242 "fixture must contain exactly the two edges this test reasons about"
1243 );
1244
1245 // Write side: rewriting `sarah → sarah-chen` must retarget the edge the
1246 // parser recognized (matching the `.md` spelling), preserve the display,
1247 // and leave the unrelated `sarah-2` edge untouched.
1248 let got = rewrite_links_to(
1249 body,
1250 Path::new("records/contacts/sarah"),
1251 Path::new("records/contacts/sarah-chen"),
1252 );
1253 assert_eq!(
1254 got,
1255 "Met [[records/contacts/sarah-chen|Sarah]] and not [[records/contacts/sarah-2]]."
1256 );
1257
1258 // Cross-check through the parser: the rewritten text's edge set is the
1259 // original with `sarah` swapped for `sarah-chen` — proving the rewrite
1260 // moved exactly one edge, the one the read side keyed on.
1261 fx.write("records/profiles/bio.md", "profile", "bio", &got);
1262 let after = forwardlinks(&fx.store, &fx.p("records/profiles/bio.md")).unwrap();
1263 assert_eq!(
1264 paths(&after),
1265 vec!["records/contacts/sarah-2", "records/contacts/sarah-chen"],
1266 "after rewrite the parser must see the new target and not the old"
1267 );
1268 }
1269
1270 #[test]
1271 fn rewrite_empty_old_target_is_a_no_op() {
1272 // A degenerate `old` (normalizes to empty) must never rewrite anything,
1273 // mirroring backlinks' empty-target guard.
1274 let input = "[[records/x]] [[]] text";
1275 let got = rewrite_links_to(input, Path::new(""), Path::new("records/y"));
1276 assert_eq!(got, input);
1277 }
1278
1279 #[test]
1280 fn rewrite_no_match_returns_input_unchanged() {
1281 let input = "no links, [external](https://x), and [[records/concepts/y]]";
1282 let got = rewrite_links_to(input, Path::new("records/x"), Path::new("records/z"));
1283 assert_eq!(got, input);
1284 }
1285
1286 #[test]
1287 fn rewrite_does_not_corrupt_links_in_nested_or_long_run_fences() {
1288 // Regression for the naive `starts_with("```")/("~~~")` toggle in the
1289 // rewriter: a fenced example documenting wiki-link syntax must be copied
1290 // VERBATIM, never retargeted — matching validate's edge notion. The
1291 // standard nested-fence convention (a ````-run block wrapping a ```
1292 // example) used to flip the bool mid-block, so the example link was
1293 // rewritten (silent documentation corruption).
1294 let body = "\
1295Here is how to write a link:
1296
1297````
1298```
1299[[records/contacts/bob]]
1300```
1301still fenced [[records/contacts/bob]]
1302````
1303
1304Real link: [[records/contacts/bob]].
1305";
1306 let got = rewrite_links_to(
1307 body,
1308 Path::new("records/contacts/bob"),
1309 Path::new("records/contacts/robert"),
1310 );
1311 // The two fenced examples are untouched; only the real link retargets.
1312 let expected = "\
1313Here is how to write a link:
1314
1315````
1316```
1317[[records/contacts/bob]]
1318```
1319still fenced [[records/contacts/bob]]
1320````
1321
1322Real link: [[records/contacts/robert]].
1323";
1324 assert_eq!(
1325 got, expected,
1326 "fenced example links must survive a rename verbatim; only live edges retarget"
1327 );
1328 }
1329
1330 #[test]
1331 fn rewrite_frontmatter_fence_does_not_swallow_body_link() {
1332 // Regression for the frontmatter/body fence-boundary data-loss bug: a
1333 // stray ``` inside a YAML block scalar in frontmatter used to open a code
1334 // fence that persisted into the body, so the rewriter treated every body
1335 // `[[…]]` as fenced and skipped it — leaving a dangling link after rename
1336 // even though `backlinks`/`forwardlinks` (which reset fence state at the
1337 // frontmatter boundary) still report the body edge. The write side must
1338 // split the frontmatter off and scan the body with a FRESH fence state,
1339 // exactly like the read side, so rename and the graph reads agree.
1340 let fx = Fixture::new();
1341 let text = "\
1342---
1343type: meeting
1344created: 2026-05-27T08:00:00-07:00
1345updated: 2026-05-27T08:00:00-07:00
1346summary: Notes
1347note: |
1348 fence with no close:
1349 ```
1350---
1351Met with [[records/contacts/sarah-chen]] yesterday.
1352";
1353 fx.write_raw("records/meeting.md", text);
1354
1355 // Read side: despite the stray fence in frontmatter, the body edge is a
1356 // live forward edge (fence state resets at the frontmatter boundary).
1357 let edges = forwardlinks(&fx.store, &fx.p("records/meeting.md")).unwrap();
1358 assert_eq!(
1359 paths(&edges),
1360 vec!["records/contacts/sarah-chen"],
1361 "read side must report the body edge despite the frontmatter fence"
1362 );
1363
1364 // Write side: rename must retarget that exact body edge — not skip it as
1365 // fenced. Output is byte-exact everywhere else (frontmatter verbatim,
1366 // including the stray ```).
1367 let got = rewrite_links_to(
1368 text,
1369 Path::new("records/contacts/sarah-chen"),
1370 Path::new("records/contacts/sc2"),
1371 );
1372 let expected = "\
1373---
1374type: meeting
1375created: 2026-05-27T08:00:00-07:00
1376updated: 2026-05-27T08:00:00-07:00
1377summary: Notes
1378note: |
1379 fence with no close:
1380 ```
1381---
1382Met with [[records/contacts/sc2]] yesterday.
1383";
1384 assert_eq!(
1385 got, expected,
1386 "the body link the read side reports must be rewritten; frontmatter copied verbatim"
1387 );
1388
1389 // Cross-check through the parser: after rewrite the read side sees the new
1390 // target and no trace of the old — rename and the graph reads agree.
1391 fx.write_raw("records/meeting.md", &got);
1392 let after = forwardlinks(&fx.store, &fx.p("records/meeting.md")).unwrap();
1393 assert_eq!(
1394 paths(&after),
1395 vec!["records/contacts/sc2"],
1396 "after rename the read side must report only the retargeted edge"
1397 );
1398 }
1399
1400 #[test]
1401 fn rewrite_link_genuinely_inside_a_body_fence_is_left_untouched() {
1402 // The boundary reset must not over-correct: a `[[…]]` truly inside a BODY
1403 // code fence is a documentation example, NOT an edge (matching the read
1404 // side), and must survive rename verbatim. This pairs with the
1405 // frontmatter-fence test: the body still gets a fresh, real fence state.
1406 let fx = Fixture::new();
1407 let text = "\
1408---
1409type: meeting
1410created: 2026-05-27T08:00:00-07:00
1411updated: 2026-05-27T08:00:00-07:00
1412summary: Notes
1413---
1414Real link: [[records/contacts/sarah-chen]].
1415
1416```
1417Example: [[records/contacts/sarah-chen]]
1418```
1419";
1420 fx.write_raw("records/meeting.md", text);
1421
1422 // Read side: only the unfenced body link is an edge; the fenced one is not.
1423 let edges = forwardlinks(&fx.store, &fx.p("records/meeting.md")).unwrap();
1424 assert_eq!(
1425 paths(&edges),
1426 vec!["records/contacts/sarah-chen"],
1427 "only the unfenced body link is a live edge"
1428 );
1429
1430 // Write side: the real link retargets; the fenced example is byte-exact.
1431 let got = rewrite_links_to(
1432 text,
1433 Path::new("records/contacts/sarah-chen"),
1434 Path::new("records/contacts/sc2"),
1435 );
1436 let expected = "\
1437---
1438type: meeting
1439created: 2026-05-27T08:00:00-07:00
1440updated: 2026-05-27T08:00:00-07:00
1441summary: Notes
1442---
1443Real link: [[records/contacts/sc2]].
1444
1445```
1446Example: [[records/contacts/sarah-chen]]
1447```
1448";
1449 assert_eq!(
1450 got, expected,
1451 "a link inside a body fence must survive rename; only the live edge retargets"
1452 );
1453 }
1454
1455 // ── forwardlinks ─────────────────────────────────────────────────────────
1456
1457 #[test]
1458 fn forwardlinks_returns_sorted_deduped_targets_excluding_self() {
1459 let fx = Fixture::new();
1460 fx.write(
1461 "records/projects/renewal.md",
1462 "synthesis",
1463 "Renewal project",
1464 "Links: [[records/contacts/sarah]] [[records/companies/acme]] [[records/contacts/sarah]] and itself [[records/projects/renewal]].",
1465 );
1466 // The targets need not exist on disk for forwardlinks (it reads the one
1467 // file only). Self-links are dropped; duplicates collapse; sorted asc.
1468 let got = forwardlinks(&fx.store, &fx.p("records/projects/renewal.md")).unwrap();
1469 assert_eq!(
1470 paths(&got),
1471 vec!["records/companies/acme", "records/contacts/sarah"]
1472 );
1473 }
1474
1475 #[test]
1476 fn forwardlinks_picks_up_wiki_links_in_frontmatter() {
1477 // SPEC: wiki-links appear in scalar + block-sequence frontmatter fields,
1478 // not just the body. forwardlinks must follow those edges too.
1479 let fx = Fixture::new();
1480 fx.write_raw(
1481 "records/meetings/m1.md",
1482 "---\ntype: meeting\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Renewal sync\ncompany: [[records/companies/acme]]\nattendees:\n - [[records/contacts/sarah]]\n - [[records/contacts/elena]]\n---\nNotes about [[records/projects/renewal]].\n",
1483 );
1484 let got = forwardlinks(&fx.store, &fx.p("records/meetings/m1.md")).unwrap();
1485 assert_eq!(
1486 paths(&got),
1487 vec![
1488 "records/companies/acme",
1489 "records/contacts/elena",
1490 "records/contacts/sarah",
1491 "records/projects/renewal",
1492 ]
1493 );
1494 }
1495
1496 #[test]
1497 fn forwardlinks_missing_file_is_empty_not_error() {
1498 let fx = Fixture::new();
1499 let got = forwardlinks(&fx.store, &fx.p("records/profiles/ghost.md")).unwrap();
1500 assert!(got.is_empty());
1501 }
1502
1503 #[test]
1504 fn forwardlinks_resolves_seed_given_without_md_extension() {
1505 let fx = Fixture::new();
1506 fx.write(
1507 "records/profiles/sarah.md",
1508 "profile",
1509 "Sarah bio",
1510 "Works at [[records/companies/acme]].",
1511 );
1512 // Seed passed in bare wiki-link form (no `.md`) must still resolve.
1513 let got = forwardlinks(&fx.store, &fx.p("records/profiles/sarah")).unwrap();
1514 assert_eq!(paths(&got), vec!["records/companies/acme"]);
1515 }
1516
1517 // ── backlinks ──────────────────────────────────────────────────────────
1518
1519 #[test]
1520 fn backlinks_finds_incoming_across_layers_and_link_forms() {
1521 let fx = Fixture::new();
1522 // Target.
1523 fx.write("records/contacts/sarah.md", "contact", "Sarah Chen", "");
1524 // Three different incoming-link spellings, all to the same target.
1525 fx.write(
1526 "records/profiles/sarah.md",
1527 "profile",
1528 "bio",
1529 "See [[records/contacts/sarah]].",
1530 );
1531 fx.write(
1532 "records/meetings/m1.md",
1533 "meeting",
1534 "Renewal call",
1535 "Attendee [[records/contacts/sarah|Sarah]].",
1536 );
1537 fx.write(
1538 "sources/emails/e1.md",
1539 "email",
1540 "Hi",
1541 "From [[records/contacts/sarah.md]] today.",
1542 );
1543 // A file that links to a DIFFERENT contact must not be a backlink.
1544 fx.write(
1545 "records/profiles/other.md",
1546 "profile",
1547 "x",
1548 "[[records/contacts/sarah-2]]",
1549 );
1550 fx.reindex();
1551
1552 // All three link forms ([[x]], [[x|d]], [[x.md]]) resolve to the same
1553 // target and are found; the linkers are returned in canonical bare form.
1554 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1555 assert_eq!(
1556 paths(&got),
1557 vec![
1558 "records/meetings/m1",
1559 "records/profiles/sarah",
1560 "sources/emails/e1",
1561 ]
1562 );
1563 }
1564
1565 #[test]
1566 fn backlinks_and_forwardlinks_round_trip_on_same_key() {
1567 // If A forwardlinks to B, then B backlinks to A — both expressed in the
1568 // identical bare key, so neighborhood can dedup across directions.
1569 let fx = Fixture::new();
1570 fx.write(
1571 "records/profiles/a.md",
1572 "profile",
1573 "A",
1574 "Knows [[records/profiles/b]].",
1575 );
1576 fx.write("records/profiles/b.md", "profile", "B", "");
1577 fx.reindex();
1578 let fwd = forwardlinks(&fx.store, &fx.p("records/profiles/a.md")).unwrap();
1579 let back = backlinks(&fx.store, &fx.p("records/profiles/b.md")).unwrap();
1580 assert_eq!(paths(&fwd), vec!["records/profiles/b"]);
1581 assert_eq!(paths(&back), vec!["records/profiles/a"]);
1582 }
1583
1584 #[test]
1585 fn backlinks_does_not_match_path_prefix_collisions() {
1586 let fx = Fixture::new();
1587 fx.write("records/contacts/sam.md", "contact", "Sam", "");
1588 // `sam-smith` shares the `sam` prefix; must NOT count as a backlink to `sam`.
1589 fx.write(
1590 "records/profiles/x.md",
1591 "profile",
1592 "x",
1593 "[[records/contacts/sam-smith]]",
1594 );
1595 // The genuine backlink.
1596 fx.write(
1597 "records/profiles/y.md",
1598 "profile",
1599 "y",
1600 "[[records/contacts/sam]]",
1601 );
1602 fx.reindex();
1603
1604 let got = backlinks(&fx.store, &fx.p("records/contacts/sam")).unwrap();
1605 assert_eq!(paths(&got), vec!["records/profiles/y"]);
1606 }
1607
1608 #[test]
1609 fn backlinks_excludes_self_reference() {
1610 let fx = Fixture::new();
1611 // A page that links to itself is not its own backlink.
1612 fx.write(
1613 "records/synthesis/overview.md",
1614 "synthesis",
1615 "Overview",
1616 "This page [[records/synthesis/overview]] references itself.",
1617 );
1618 fx.reindex();
1619 let got = backlinks(&fx.store, &fx.p("records/synthesis/overview.md")).unwrap();
1620 assert!(
1621 got.is_empty(),
1622 "self-link must not appear as a backlink, got {got:?}"
1623 );
1624 }
1625
1626 #[test]
1627 fn backlinks_empty_when_nobody_links() {
1628 let fx = Fixture::new();
1629 fx.write("records/contacts/lonely.md", "contact", "Lonely", "");
1630 fx.write(
1631 "records/profiles/unrelated.md",
1632 "profile",
1633 "x",
1634 "[[records/companies/acme]]",
1635 );
1636 fx.reindex();
1637 let got = backlinks(&fx.store, &fx.p("records/contacts/lonely.md")).unwrap();
1638 assert!(got.is_empty());
1639 }
1640
1641 #[test]
1642 fn backlinks_ignores_index_and_meta_files() {
1643 let fx = Fixture::new();
1644 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1645 // An index.md that lists the target must NOT be reported as a backlink
1646 // (indexes are catalog, not relationship edges).
1647 fx.write_raw(
1648 "records/contacts/index.md",
1649 "---\ntype: index\nscope: folder\nfolder: records/contacts\n---\n- [[records/contacts/sarah]] — Sarah\n",
1650 );
1651 fx.reindex();
1652 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1653 assert!(got.is_empty(), "index.md must be excluded, got {got:?}");
1654 }
1655
1656 #[test]
1657 fn backlinks_finds_body_only_edge_not_in_frontmatter_links_field() {
1658 // REGRESSION: the sidecar's `links` field carries only the file's
1659 // frontmatter `links:` list; it does NOT include wiki-links written in
1660 // the body or in other typed frontmatter fields. Answering backlinks
1661 // from `links[]` alone would silently miss this edge. The candidate set
1662 // is sidecar-bounded, but each candidate's edge is confirmed by parsing
1663 // the file (the same extraction forwardlinks uses), so a body-only link
1664 // must still register as a backlink.
1665 let fx = Fixture::new();
1666 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1667 // `meeting.md` links to sarah ONLY in its body — its frontmatter has no
1668 // `links:` field at all, so the sidecar record's `links` is empty.
1669 fx.write(
1670 "records/meetings/standup.md",
1671 "meeting",
1672 "Standup",
1673 "Discussed renewal with [[records/contacts/sarah]].",
1674 );
1675 fx.reindex();
1676
1677 // Guard the premise: the sidecar record really does carry an empty
1678 // `links` (so this test fails loudly if the index ever starts extracting
1679 // body links — at which point the backlink predicate could be revisited).
1680 let rec = fx
1681 .store
1682 .find_by_type("meeting")
1683 .unwrap()
1684 .into_iter()
1685 .find(|r| r.path == fx.p("records/meetings/standup.md"))
1686 .expect("meeting is catalogued in its sidecar");
1687 assert!(
1688 rec.links.is_empty(),
1689 "premise: the body link is NOT projected into the sidecar `links` field; got {:?}",
1690 rec.links
1691 );
1692
1693 // Yet backlinks still finds it — because it confirms via the file parse,
1694 // not via the sidecar `links` field.
1695 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1696 assert_eq!(
1697 paths(&got),
1698 vec!["records/meetings/standup"],
1699 "a body-only wiki-link must register as a backlink"
1700 );
1701 }
1702
1703 #[test]
1704 fn backlinks_finds_edge_in_typed_frontmatter_field() {
1705 // A wiki-link inside a *typed* frontmatter field (`company:`) is a real
1706 // edge forwardlinks follows, so backlinks must find it too — even though
1707 // the sidecar's `links` field (the `links:` key only) does not list it.
1708 let fx = Fixture::new();
1709 fx.write("records/companies/acme.md", "company", "Acme", "");
1710 fx.write_raw(
1711 "records/contacts/sarah.md",
1712 "---\ntype: contact\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Sarah\ncompany: [[records/companies/acme]]\n---\nBody with no links.\n",
1713 );
1714 fx.reindex();
1715 let got = backlinks(&fx.store, &fx.p("records/companies/acme.md")).unwrap();
1716 assert_eq!(
1717 paths(&got),
1718 vec!["records/contacts/sarah"],
1719 "a wiki-link in a typed frontmatter field is an incoming edge"
1720 );
1721 }
1722
1723 #[test]
1724 fn backlinks_unscoped_scans_the_tree_not_only_the_sidecar() {
1725 // REGRESSION (loop budget): an UNSCOPED `backlinks` must resolve incoming
1726 // edges with a SINGLE embedded-ripgrep pass over the tree
1727 // (`Store::find_links_to`), NOT by reading the sidecar candidate set and
1728 // then `read_to_string`-confirming each candidate (which re-opens every
1729 // content file → O(store); the documented >3x budget miss). A ripgrep
1730 // pass is the same scan engine `validate`/`rename`/`dbmd links` ride, and
1731 // the tree — not the sidecar — is its ground truth: a linker that is on
1732 // disk but absent from every sidecar (stale / never-built index) is still
1733 // found. We assert that behaviorally, which fails loudly if the unscoped
1734 // path ever reverts to the sidecar-bounded per-candidate confirm loop
1735 // (that loop would NOT find the unindexed linker).
1736 let fx = Fixture::new();
1737 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1738 fx.write(
1739 "records/profiles/indexed.md",
1740 "profile",
1741 "Indexed",
1742 "[[records/contacts/sarah]]",
1743 );
1744 fx.reindex(); // builds sidecars for sarah + the indexed linker
1745
1746 // Now drop a NEW linker on disk WITHOUT reindexing — it is on disk but in
1747 // no sidecar.
1748 fx.write(
1749 "records/profiles/unindexed.md",
1750 "profile",
1751 "Unindexed",
1752 "[[records/contacts/sarah]]",
1753 );
1754
1755 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1756 assert_eq!(
1757 paths(&got),
1758 vec!["records/profiles/indexed", "records/profiles/unindexed"],
1759 "unscoped backlinks ripgrep-scans the tree, so the on-disk-but-unindexed \
1760 linker is found too — not only the sidecar-catalogued one"
1761 );
1762 }
1763
1764 #[test]
1765 fn backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk() {
1766 // REGRESSION (scale contract): the SCOPED form (`--type` / `--in`) is the
1767 // I/O-scoped path — it enumerates candidates from the relevant type-folder
1768 // `index.jsonl` sidecars and parses only those, NOT a whole-tree walk.
1769 // That is what makes the scope an I/O scope, not just a result filter:
1770 // a linker that is on disk but ABSENT from the sidecar (stale / never-built
1771 // index) is NOT discovered by the scoped call (the sidecar bounds which
1772 // files are candidates). This is the loop-vs-walk distinction the SPEC
1773 // draws, and it is exactly the inverse of the unscoped tree scan above.
1774 let fx = Fixture::new();
1775 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1776 fx.write(
1777 "records/profiles/indexed.md",
1778 "profile",
1779 "Indexed",
1780 "[[records/contacts/sarah]]",
1781 );
1782 fx.reindex(); // builds sidecars for sarah + the indexed linker
1783
1784 // Drop a NEW profile linker on disk WITHOUT reindexing — on disk, in no
1785 // sidecar.
1786 fx.write(
1787 "records/profiles/unindexed.md",
1788 "profile",
1789 "Unindexed",
1790 "[[records/contacts/sarah]]",
1791 );
1792
1793 // Scoped to the `profile` type: the candidate set is the sidecar's, so
1794 // only the catalogued linker is found — the unindexed one is invisible.
1795 let only_profiles = vec!["profile".to_string()];
1796 let got = backlinks_filtered(
1797 &fx.store,
1798 &fx.p("records/contacts/sarah.md"),
1799 &only_profiles,
1800 None,
1801 )
1802 .unwrap();
1803 assert_eq!(
1804 paths(&got),
1805 vec!["records/profiles/indexed"],
1806 "scoped backlinks reads the sidecar candidate set; the on-disk-but-unindexed \
1807 linker is not tree-walked"
1808 );
1809 }
1810
1811 #[test]
1812 fn backlinks_filtered_type_scopes_the_candidate_set() {
1813 // `--type` narrows backlinks to linkers of that type. Two files link to
1814 // the target — one `meeting`, one `profile`; filtering to `meeting`
1815 // returns only the meeting.
1816 let fx = Fixture::new();
1817 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1818 fx.write(
1819 "records/meetings/m1.md",
1820 "meeting",
1821 "Call",
1822 "[[records/contacts/sarah]]",
1823 );
1824 fx.write(
1825 "records/profiles/bio.md",
1826 "profile",
1827 "Bio",
1828 "[[records/contacts/sarah]]",
1829 );
1830 fx.reindex();
1831
1832 let only_meetings = vec!["meeting".to_string()];
1833 let got = backlinks_filtered(
1834 &fx.store,
1835 &fx.p("records/contacts/sarah.md"),
1836 &only_meetings,
1837 None,
1838 )
1839 .unwrap();
1840 assert_eq!(
1841 paths(&got),
1842 vec!["records/meetings/m1"],
1843 "--type meeting must exclude the profile linker"
1844 );
1845
1846 // Unfiltered, both come back — proving the filter (not the data) dropped one.
1847 let all = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1848 assert_eq!(
1849 paths(&all),
1850 vec!["records/meetings/m1", "records/profiles/bio"]
1851 );
1852 }
1853
1854 #[test]
1855 fn backlinks_filtered_layer_scopes_the_candidate_set() {
1856 // `--in <layer>` narrows backlinks to linkers under that layer. The two
1857 // linkers live in different layers (a sources email and a records
1858 // meeting) so the scope genuinely separates them.
1859 let fx = Fixture::new();
1860 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1861 fx.write(
1862 "records/meetings/m1.md",
1863 "meeting",
1864 "Call",
1865 "[[records/contacts/sarah]]",
1866 );
1867 fx.write(
1868 "sources/emails/intro.md",
1869 "email",
1870 "Intro",
1871 "[[records/contacts/sarah]]",
1872 );
1873 fx.reindex();
1874
1875 let got = backlinks_filtered(
1876 &fx.store,
1877 &fx.p("records/contacts/sarah.md"),
1878 &[],
1879 Some(Layer::Sources),
1880 )
1881 .unwrap();
1882 assert_eq!(
1883 paths(&got),
1884 vec!["sources/emails/intro"],
1885 "--in sources must keep only the sources-layer linker"
1886 );
1887
1888 let records_only = backlinks_filtered(
1889 &fx.store,
1890 &fx.p("records/contacts/sarah.md"),
1891 &[],
1892 Some(Layer::Records),
1893 )
1894 .unwrap();
1895 assert_eq!(paths(&records_only), vec!["records/meetings/m1"]);
1896 }
1897
1898 #[test]
1899 fn backlinks_scoped_type_spans_all_topic_folders_in_its_layer() {
1900 // REGRESSION (finding #12): a `type` can legitimately span several folders
1901 // within one layer — a `profile` is filed under its canonical
1902 // `records/profiles/` folder, but an agent may also file a profile under
1903 // another `records/<folder>/` (the type, not the folder, is authoritative).
1904 // The scoped candidate set must read the whole `records/` layer and filter
1905 // by type, NOT just the canonical-guess folder `records/profiles/`. Before
1906 // the fix, `find_by_type("profile")` read ONLY `records/profiles/index.jsonl`
1907 // whenever that sidecar existed, silently dropping every profile linker
1908 // filed under any other folder — so `backlinks --type profile` under-reported
1909 // dependents (a wrong blast-radius check) the moment a `records/profiles/`
1910 // page also existed.
1911 //
1912 // The trigger needs BOTH: a populated `records/profiles/` (so its canonical
1913 // sidecar exists) AND a profile elsewhere in the layer that links the
1914 // target. The earlier
1915 // `backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk` test
1916 // masks this bug precisely because its fixture has no `records/profiles/`.
1917 let fx = Fixture::new();
1918 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1919 // A profile in the CANONICAL type folder, NOT linking the target — its
1920 // only purpose is to make `records/profiles/index.jsonl` exist on disk.
1921 fx.write(
1922 "records/profiles/glossary.md",
1923 "profile",
1924 "Glossary",
1925 "No link to sarah here.",
1926 );
1927 // A profile in a NON-canonical folder that DOES link the target.
1928 fx.write(
1929 "records/people/sarah.md",
1930 "profile",
1931 "Sarah bio",
1932 "Profile of [[records/contacts/sarah]].",
1933 );
1934 fx.reindex(); // builds records/profiles/index.jsonl AND records/people/index.jsonl
1935
1936 // Scoped to `profile`: the off-canonical linker MUST be found. Pre-fix,
1937 // the candidate set was only `records/profiles/`'s sidecar, so this was empty.
1938 let scoped = backlinks_filtered(
1939 &fx.store,
1940 &fx.p("records/contacts/sarah.md"),
1941 &["profile".to_string()],
1942 None,
1943 )
1944 .unwrap();
1945 assert_eq!(
1946 paths(&scoped),
1947 vec!["records/people/sarah"],
1948 "a profile filed outside records/profiles/ must still be a scoped backlink"
1949 );
1950
1951 // Cross-check: the unscoped path (ripgrep tree scan) finds the same single
1952 // linker, proving the scoped result is now complete — not over- or
1953 // under-counting — and that the data was real all along.
1954 let unscoped = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1955 assert_eq!(
1956 paths(&unscoped),
1957 vec!["records/people/sarah"],
1958 "scoped and unscoped backlinks must agree on the edge set"
1959 );
1960 }
1961
1962 #[test]
1963 fn backlinks_scoped_type_finds_loose_file_at_non_canonical_layer() {
1964 // REGRESSION (spec-conformance, SPEC § Loose files): a loose file (content
1965 // directly at a layer root, no type-folder) may be filed at a layer that is
1966 // NOT the type's canonical layer — e.g. a `note` (canonical layer
1967 // `sources/`) filed as `records/loose-note.md` and catalogued in
1968 // `records/index.jsonl`. A scoped `backlinks --type note` must still find
1969 // it, matching the unscoped scan and `dbmd query --type note`.
1970 //
1971 // Pre-fix, `candidate_records(--type note)` read only `layer_for_type(note)`
1972 // = Sources, so the records-loose note was invisible (`--type note` empty),
1973 // and `--type note --in records` hit the early `continue` (records ≠ the
1974 // note's canonical Sources layer) → also empty. Both diverged from the
1975 // store-wide unscoped scan. The fix reads store-wide (or the named layer)
1976 // sidecars and filters by `type`, never short-circuiting on the canonical
1977 // layer.
1978 let fx = Fixture::new();
1979 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1980 // A loose `note` directly at the records/ layer root (no type-folder),
1981 // linking the target. Its canonical layer is sources/, so this exercises
1982 // exactly the off-canonical-layer loose-file path.
1983 fx.write_raw(
1984 "records/loose-note.md",
1985 "---\ntype: note\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Loose\n---\nMentions [[records/contacts/sarah]].\n",
1986 );
1987 fx.reindex(); // catalogs the loose note in records/index.jsonl
1988
1989 let target = fx.p("records/contacts/sarah.md");
1990 let note_type = vec!["note".to_string()];
1991
1992 // Unscoped: the loose note is a backlink (ground truth).
1993 let unscoped = backlinks(&fx.store, &target).unwrap();
1994 assert_eq!(
1995 paths(&unscoped),
1996 vec!["records/loose-note"],
1997 "unscoped backlinks finds the records-loose note"
1998 );
1999
2000 // `--type note` (no layer): must agree with unscoped, NOT empty.
2001 let by_type = backlinks_filtered(&fx.store, &target, ¬e_type, None).unwrap();
2002 assert_eq!(
2003 paths(&by_type),
2004 vec!["records/loose-note"],
2005 "`--type note` must find the loose note filed at the non-canonical (records) layer"
2006 );
2007
2008 // `--type note --in records`: the note lives in records/, so this must
2009 // find it too — the early `continue` on canonical-layer mismatch is gone.
2010 let by_type_in_records =
2011 backlinks_filtered(&fx.store, &target, ¬e_type, Some(Layer::Records)).unwrap();
2012 assert_eq!(
2013 paths(&by_type_in_records),
2014 vec!["records/loose-note"],
2015 "`--type note --in records` must find the records-loose note"
2016 );
2017
2018 // Cross-check the same completeness via the structured query path the SPEC
2019 // ties graph reads to: `query --type note` (store-wide) sees the loose note,
2020 // proving the data was real and the scoped graph result now agrees with it.
2021 let q_records: Vec<String> = paths(
2022 &crate::query::Query::new()
2023 .with_type("note")
2024 .execute(&fx.store)
2025 .unwrap()
2026 .into_iter()
2027 .map(|r| r.path)
2028 .collect::<Vec<_>>(),
2029 );
2030 assert_eq!(
2031 q_records,
2032 vec!["records/loose-note.md"],
2033 "query --type note sees the loose note store-wide; scoped backlinks must agree"
2034 );
2035 }
2036
2037 // ── neighborhood ─────────────────────────────────────────────────────────
2038
2039 #[test]
2040 fn neighborhood_hops_zero_is_empty() {
2041 let fx = Fixture::new();
2042 fx.write(
2043 "records/profiles/a.md",
2044 "profile",
2045 "A",
2046 "[[records/profiles/b]]",
2047 );
2048 fx.write("records/profiles/b.md", "profile", "B", "");
2049 let slice = neighborhood(
2050 &fx.store,
2051 &fx.p("records/profiles/a.md"),
2052 0,
2053 &[],
2054 Direction::Both,
2055 )
2056 .unwrap();
2057 assert_eq!(slice.seed, fx.p("records/profiles/a"));
2058 assert!(slice.nodes.is_empty());
2059 }
2060
2061 #[test]
2062 fn neighborhood_outgoing_one_hop_reads_summary_and_type() {
2063 let fx = Fixture::new();
2064 fx.write(
2065 "records/profiles/a.md",
2066 "profile",
2067 "Person A",
2068 "Knows [[records/contacts/b]].",
2069 );
2070 fx.write("records/contacts/b.md", "contact", "Contact B summary", "");
2071 let slice = neighborhood(
2072 &fx.store,
2073 &fx.p("records/profiles/a.md"),
2074 1,
2075 &[],
2076 Direction::Outgoing,
2077 )
2078 .unwrap();
2079 assert_eq!(slice.nodes.len(), 1);
2080 let n = &slice.nodes[0];
2081 assert_eq!(n.path, fx.p("records/contacts/b"));
2082 assert_eq!(n.summary, "Contact B summary");
2083 assert_eq!(n.type_.as_deref(), Some("contact"));
2084 assert_eq!(n.hops, 1);
2085 assert_eq!(
2086 n.via,
2087 Some((fx.p("records/profiles/a"), Direction::Outgoing))
2088 );
2089 }
2090
2091 #[test]
2092 fn neighborhood_incoming_only_walks_backlinks() {
2093 let fx = Fixture::new();
2094 // a -> seed (incoming to seed). seed -> c (outgoing from seed).
2095 fx.write(
2096 "records/profiles/seed.md",
2097 "profile",
2098 "Seed",
2099 "Out to [[records/profiles/c]].",
2100 );
2101 fx.write(
2102 "records/profiles/a.md",
2103 "profile",
2104 "A",
2105 "In to [[records/profiles/seed]].",
2106 );
2107 fx.write("records/profiles/c.md", "profile", "C", "");
2108 fx.reindex();
2109 let slice = neighborhood(
2110 &fx.store,
2111 &fx.p("records/profiles/seed.md"),
2112 1,
2113 &[],
2114 Direction::Incoming,
2115 )
2116 .unwrap();
2117 // Incoming direction: only `a` (which links TO seed), not `c`.
2118 assert_eq!(
2119 paths(
2120 &slice
2121 .nodes
2122 .iter()
2123 .map(|n| n.path.clone())
2124 .collect::<Vec<_>>()
2125 ),
2126 vec!["records/profiles/a"]
2127 );
2128 assert_eq!(
2129 slice.nodes[0].via,
2130 Some((fx.p("records/profiles/seed"), Direction::Incoming))
2131 );
2132 }
2133
2134 #[test]
2135 fn neighborhood_bounded_bfs_respects_hop_limit_and_min_distance() {
2136 let fx = Fixture::new();
2137 // Chain a -> b -> c -> d, all outgoing.
2138 fx.write("records/c/a.md", "concept", "A", "[[records/c/b]]");
2139 fx.write("records/c/b.md", "concept", "B", "[[records/c/c]]");
2140 fx.write("records/c/c.md", "concept", "C", "[[records/c/d]]");
2141 fx.write("records/c/d.md", "concept", "D", "");
2142 let slice = neighborhood(
2143 &fx.store,
2144 &fx.p("records/c/a.md"),
2145 2,
2146 &[],
2147 Direction::Outgoing,
2148 )
2149 .unwrap();
2150 // 2 hops reaches b (1) and c (2), not d (3).
2151 let by_path: HashMap<String, u32> = slice
2152 .nodes
2153 .iter()
2154 .map(|n| (n.path.to_string_lossy().to_string(), n.hops))
2155 .collect();
2156 assert_eq!(by_path.get("records/c/b").copied(), Some(1));
2157 assert_eq!(by_path.get("records/c/c").copied(), Some(2));
2158 assert_eq!(by_path.get("records/c/d"), None);
2159 assert_eq!(slice.nodes.len(), 2);
2160 }
2161
2162 #[test]
2163 fn neighborhood_records_min_hops_on_diamond() {
2164 let fx = Fixture::new();
2165 // Diamond: a -> b, a -> c, b -> d, c -> d. d is reachable at hop 2 from
2166 // either branch; it must be recorded once, at hop 2.
2167 fx.write(
2168 "records/d/a.md",
2169 "concept",
2170 "A",
2171 "[[records/d/b]] [[records/d/c]]",
2172 );
2173 fx.write("records/d/b.md", "concept", "B", "[[records/d/d]]");
2174 fx.write("records/d/c.md", "concept", "C", "[[records/d/d]]");
2175 fx.write("records/d/d.md", "concept", "D", "");
2176 let slice = neighborhood(
2177 &fx.store,
2178 &fx.p("records/d/a.md"),
2179 3,
2180 &[],
2181 Direction::Outgoing,
2182 )
2183 .unwrap();
2184 let d_nodes: Vec<&ContextNode> = slice
2185 .nodes
2186 .iter()
2187 .filter(|n| n.path == fx.p("records/d/d"))
2188 .collect();
2189 assert_eq!(d_nodes.len(), 1, "d must appear exactly once");
2190 assert_eq!(d_nodes[0].hops, 2, "d's min distance from a is 2");
2191 // b and c at hop 1, d at hop 2 => 3 nodes total, no cycle blowup.
2192 assert_eq!(slice.nodes.len(), 3);
2193 }
2194
2195 #[test]
2196 fn neighborhood_type_filter_narrows_results_but_not_traversal() {
2197 let fx = Fixture::new();
2198 // seed -> contact -> meeting. Filtering to `meeting` must still reach
2199 // the meeting THROUGH the (excluded) contact at hop 2.
2200 fx.write(
2201 "records/profiles/seed.md",
2202 "profile",
2203 "Seed",
2204 "[[records/contacts/sarah]]",
2205 );
2206 fx.write(
2207 "records/contacts/sarah.md",
2208 "contact",
2209 "Sarah",
2210 "[[records/meetings/m1]]",
2211 );
2212 fx.write("records/meetings/m1.md", "meeting", "Renewal call", "");
2213 let only_meetings = vec!["meeting".to_string()];
2214 let slice = neighborhood(
2215 &fx.store,
2216 &fx.p("records/profiles/seed.md"),
2217 2,
2218 &only_meetings,
2219 Direction::Outgoing,
2220 )
2221 .unwrap();
2222 // Only the meeting is returned; the contact is traversed but filtered out.
2223 assert_eq!(slice.nodes.len(), 1);
2224 assert_eq!(slice.nodes[0].path, fx.p("records/meetings/m1"));
2225 assert_eq!(slice.nodes[0].type_.as_deref(), Some("meeting"));
2226 assert_eq!(slice.nodes[0].hops, 2);
2227 }
2228
2229 #[test]
2230 fn neighborhood_capped_bounds_traversal_not_just_output() {
2231 // REGRESSION (finding #16): `neighborhood` expands every reached node, and
2232 // each incoming-edge expansion is a full-store scan, so the per-node cost
2233 // is O(visited × store). The CLI's `--limit` was applied post-hoc as a
2234 // `.take(n)` on the RESULT, which caps printed nodes but NOT the traversal
2235 // — the scans still fire for every reachable node. `neighborhood_capped`
2236 // bounds the traversal itself: once `max_nodes` distinct nodes are
2237 // admitted, the BFS stops discovering (and therefore stops scanning).
2238 //
2239 // Structure proving traversal — not just output — is bounded:
2240 // seed -> a, b, c (hop 1, discovered in sorted order: a, b, c)
2241 // a -> deep (hop 2, reachable ONLY by expanding `a`)
2242 // Cap at 2: admit `a` and `b`, stop before `c` and before any hop-2
2243 // expansion. `deep` is therefore unreachable. A post-hoc `.take(2)` would
2244 // have traversed the whole graph (reaching `deep`) and only then truncated
2245 // — so the absence of `deep` is observable proof the traversal stopped.
2246 let fx = Fixture::new();
2247 fx.write(
2248 "records/n/seed.md",
2249 "concept",
2250 "Seed",
2251 "[[records/n/a]] [[records/n/b]] [[records/n/c]]",
2252 );
2253 fx.write("records/n/a.md", "concept", "A", "[[records/n/deep]]");
2254 fx.write("records/n/b.md", "concept", "B", "");
2255 fx.write("records/n/c.md", "concept", "C", "");
2256 fx.write("records/n/deep.md", "concept", "Deep", "");
2257
2258 // Uncapped over 3 hops: all four reachable nodes appear (a, b, c at hop 1,
2259 // deep at hop 2) — the full set the cap is measured against.
2260 let full = neighborhood(
2261 &fx.store,
2262 &fx.p("records/n/seed.md"),
2263 3,
2264 &[],
2265 Direction::Outgoing,
2266 )
2267 .unwrap();
2268 assert_eq!(
2269 paths(
2270 &full
2271 .nodes
2272 .iter()
2273 .map(|n| n.path.clone())
2274 .collect::<Vec<_>>()
2275 ),
2276 vec![
2277 "records/n/a",
2278 "records/n/b",
2279 "records/n/c",
2280 "records/n/deep"
2281 ],
2282 "uncapped traversal reaches every node within the hop budget"
2283 );
2284
2285 // Capped at 2 over the SAME hop budget: exactly the first two hop-1 nodes,
2286 // and crucially NOT `deep` — the cap halted the BFS before any node was
2287 // expanded into hop 2, so the deep node was never traversed to.
2288 let capped = neighborhood_capped(
2289 &fx.store,
2290 &fx.p("records/n/seed.md"),
2291 3,
2292 &[],
2293 Direction::Outgoing,
2294 Some(2),
2295 )
2296 .unwrap();
2297 assert_eq!(
2298 paths(
2299 &capped
2300 .nodes
2301 .iter()
2302 .map(|n| n.path.clone())
2303 .collect::<Vec<_>>()
2304 ),
2305 vec!["records/n/a", "records/n/b"],
2306 "the cap bounds traversal: only the first 2 nodes are reached, and the \
2307 hop-2 `deep` node (reachable only by expanding a capped-out node) is \
2308 never traversed"
2309 );
2310
2311 // `max_nodes = None` is exactly the unbounded `neighborhood` behavior.
2312 let uncapped = neighborhood_capped(
2313 &fx.store,
2314 &fx.p("records/n/seed.md"),
2315 3,
2316 &[],
2317 Direction::Outgoing,
2318 None,
2319 )
2320 .unwrap();
2321 assert_eq!(
2322 uncapped.nodes.len(),
2323 full.nodes.len(),
2324 "None cap matches the unbounded neighborhood result"
2325 );
2326 }
2327
2328 #[test]
2329 fn neighborhood_capped_both_direction_caps_the_node_count() {
2330 // The CLI always passes `Direction::Both` (the per-node backlinks scan is
2331 // the expensive path the cap exists to bound). The cap gates discovery in
2332 // any direction, so a hub linked from many nodes is still bounded.
2333 let fx = Fixture::new();
2334 fx.write("records/profiles/hub.md", "profile", "Hub", "");
2335 for n in ["a", "b", "c", "d", "e"] {
2336 fx.write(
2337 &format!("records/profiles/{n}.md"),
2338 "profile",
2339 n,
2340 "[[records/profiles/hub]]",
2341 );
2342 }
2343 fx.reindex();
2344
2345 let capped = neighborhood_capped(
2346 &fx.store,
2347 &fx.p("records/profiles/hub.md"),
2348 1,
2349 &[],
2350 Direction::Both,
2351 Some(3),
2352 )
2353 .unwrap();
2354 assert_eq!(
2355 capped.nodes.len(),
2356 3,
2357 "Both-direction neighborhood is bounded to the node cap"
2358 );
2359
2360 // Without the cap the same call returns all five backlinking nodes,
2361 // proving the cap (not the data) limited the set.
2362 let uncapped = neighborhood(
2363 &fx.store,
2364 &fx.p("records/profiles/hub.md"),
2365 1,
2366 &[],
2367 Direction::Both,
2368 )
2369 .unwrap();
2370 assert_eq!(uncapped.nodes.len(), 5);
2371 }
2372
2373 #[test]
2374 fn neighborhood_cycle_terminates() {
2375 let fx = Fixture::new();
2376 // a <-> b cycle. Must not loop forever; each appears once.
2377 fx.write("records/g/a.md", "concept", "A", "[[records/g/b]]");
2378 fx.write("records/g/b.md", "concept", "B", "[[records/g/a]]");
2379 fx.reindex();
2380 let slice =
2381 neighborhood(&fx.store, &fx.p("records/g/a.md"), 10, &[], Direction::Both).unwrap();
2382 // From a: b is the only other node (a is the seed, excluded).
2383 assert_eq!(
2384 paths(
2385 &slice
2386 .nodes
2387 .iter()
2388 .map(|n| n.path.clone())
2389 .collect::<Vec<_>>()
2390 ),
2391 vec!["records/g/b"]
2392 );
2393 }
2394
2395 // ── orphans ──────────────────────────────────────────────────────────────
2396
2397 #[test]
2398 fn orphans_finds_files_with_no_edges_either_direction() {
2399 let fx = Fixture::new();
2400 // Wired pair: a links to b (a has outgoing, b has incoming).
2401 fx.write(
2402 "records/profiles/a.md",
2403 "profile",
2404 "A",
2405 "[[records/profiles/b]]",
2406 );
2407 fx.write("records/profiles/b.md", "profile", "B", "");
2408 // Orphan: no links in or out.
2409 fx.write(
2410 "sources/emails/lonely.md",
2411 "email",
2412 "Lonely email",
2413 "Just text, no links.",
2414 );
2415 let got = orphans(&fx.store, None).unwrap();
2416 assert_eq!(paths(&got), vec!["sources/emails/lonely.md"]);
2417 }
2418
2419 #[test]
2420 fn orphans_file_with_only_broken_outgoing_link_is_orphan() {
2421 let fx = Fixture::new();
2422 // Broken targets are validation issues, not graph edges to another
2423 // store file. A file whose only link points nowhere is still an orphan.
2424 fx.write(
2425 "records/profiles/a.md",
2426 "profile",
2427 "A",
2428 "[[records/contacts/ghost]]",
2429 );
2430 let got = orphans(&fx.store, None).unwrap();
2431 assert!(
2432 paths(&got).contains(&"records/profiles/a.md".to_string()),
2433 "broken outgoing links must not wire the graph: {got:?}"
2434 );
2435 }
2436
2437 #[test]
2438 fn orphans_file_with_only_incoming_is_not_orphan() {
2439 let fx = Fixture::new();
2440 // `target` has no outgoing links but IS linked to by `linker` — not an orphan.
2441 fx.write("records/contacts/target.md", "contact", "Target", "");
2442 fx.write(
2443 "records/profiles/linker.md",
2444 "profile",
2445 "Linker",
2446 "[[records/contacts/target]]",
2447 );
2448 let got = orphans(&fx.store, None).unwrap();
2449 assert!(
2450 !paths(&got).contains(&"records/contacts/target.md".to_string()),
2451 "incoming-only is not an orphan: {got:?}"
2452 );
2453 // `linker` has outgoing, so also not an orphan.
2454 assert!(!paths(&got).contains(&"records/profiles/linker.md".to_string()));
2455 }
2456
2457 #[test]
2458 fn orphans_incoming_link_from_other_layer_unorphans() {
2459 let fx = Fixture::new();
2460 // Candidate in records/, only incoming edge comes from sources/ — a
2461 // cross-layer link must still un-orphan it even when scoped to records.
2462 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
2463 fx.write(
2464 "sources/emails/sarah.md",
2465 "email",
2466 "bio",
2467 "[[records/contacts/sarah]]",
2468 );
2469 // A genuine orphan in records/ to prove the scope still returns something.
2470 fx.write("records/contacts/nemo.md", "contact", "Nemo", "");
2471 let got = orphans(&fx.store, Some(Layer::Records)).unwrap();
2472 assert_eq!(paths(&got), vec!["records/contacts/nemo.md"]);
2473 }
2474
2475 #[test]
2476 fn orphans_layer_scope_filters_candidates() {
2477 let fx = Fixture::new();
2478 // Orphans across both layers: one source, and two records (an atomic
2479 // contact + a conclusion `profile`, the former wiki-page).
2480 fx.write("sources/emails/s.md", "email", "S", "no links");
2481 fx.write("records/contacts/r.md", "contact", "R", "");
2482 fx.write("records/profiles/w.md", "profile", "W", "");
2483 // The records scope keeps only the two records-layer orphans.
2484 let only_records = orphans(&fx.store, Some(Layer::Records)).unwrap();
2485 assert_eq!(
2486 paths(&only_records),
2487 vec!["records/contacts/r.md", "records/profiles/w.md"]
2488 );
2489 let only_sources = orphans(&fx.store, Some(Layer::Sources)).unwrap();
2490 assert_eq!(paths(&only_sources), vec!["sources/emails/s.md"]);
2491 // No scope: all three, sorted (records, records, sources).
2492 let all = orphans(&fx.store, None).unwrap();
2493 assert_eq!(
2494 paths(&all),
2495 vec![
2496 "records/contacts/r.md",
2497 "records/profiles/w.md",
2498 "sources/emails/s.md",
2499 ]
2500 );
2501 }
2502
2503 #[test]
2504 fn orphans_self_link_does_not_count_as_an_edge() {
2505 let fx = Fixture::new();
2506 // A page that only links to itself has no real edges => still an orphan.
2507 fx.write(
2508 "records/synthesis/solo.md",
2509 "synthesis",
2510 "Solo",
2511 "I reference [[records/synthesis/solo]] only.",
2512 );
2513 let got = orphans(&fx.store, None).unwrap();
2514 assert_eq!(paths(&got), vec!["records/synthesis/solo.md"]);
2515 }
2516
2517 #[test]
2518 fn orphans_excludes_index_and_db_files() {
2519 let fx = Fixture::new();
2520 // A lone index.md / DB.md must never be reported as an orphan content file.
2521 fx.write_raw(
2522 "records/index.md",
2523 "---\ntype: index\nscope: layer\nfolder: records\n---\n# records\n",
2524 );
2525 fx.write(
2526 "records/profiles/real-orphan.md",
2527 "profile",
2528 "Real",
2529 "no links",
2530 );
2531 let got = orphans(&fx.store, None).unwrap();
2532 assert_eq!(paths(&got), vec!["records/profiles/real-orphan.md"]);
2533 }
2534
2535 // ── frontmatter_block helper ─────────────────────────────────────────────
2536
2537 #[test]
2538 fn frontmatter_block_extracts_between_fences() {
2539 let text = "---\ntype: contact\nsummary: hi\n---\nbody here\n";
2540 assert_eq!(
2541 frontmatter_block(text),
2542 Some("type: contact\nsummary: hi\n")
2543 );
2544 }
2545
2546 #[test]
2547 fn frontmatter_block_none_without_leading_fence() {
2548 let text = "no frontmatter here\n";
2549 assert_eq!(frontmatter_block(text), None);
2550 }
2551
2552 #[test]
2553 fn frontmatter_block_tolerates_leading_bom() {
2554 // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
2555 // fence must not hide the frontmatter from the graph layer — otherwise a
2556 // BOM-prefixed file the catalog indexes contributes no backlinks/edges.
2557 // Pre-fix the `---\n` strip failed on the BOM and returned None.
2558 let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody here\n";
2559 assert_eq!(
2560 frontmatter_block(text),
2561 Some("type: contact\nsummary: hi\n"),
2562 "a leading BOM must not hide frontmatter from the graph layer"
2563 );
2564 }
2565
2566 // ── shared edge notion: whitespace / fence / case / containment ──────────
2567
2568 /// Padded `[[ x ]]` must be a forward edge AND (after reindex) a backward
2569 /// edge — the two views agreeing on the same edge in a clean store.
2570 #[test]
2571 fn padded_link_is_both_a_forward_and_backward_edge() {
2572 let fx = Fixture::new();
2573 fx.write(
2574 "records/contacts/sarah.md",
2575 "contact",
2576 "Sarah",
2577 "the contact",
2578 );
2579 fx.write(
2580 "records/profiles/a.md",
2581 "profile",
2582 "A",
2583 "See [[ records/contacts/sarah ]] today.",
2584 );
2585 fx.reindex();
2586
2587 assert_eq!(
2588 paths(&forwardlinks(&fx.store, Path::new("records/profiles/a.md")).unwrap()),
2589 vec!["records/contacts/sarah"],
2590 "padded link is a forward edge"
2591 );
2592 assert_eq!(
2593 paths(&backlinks(&fx.store, Path::new("records/contacts/sarah.md")).unwrap()),
2594 vec!["records/profiles/a"],
2595 "padded link is the SAME backward edge (forward and backward agree)"
2596 );
2597 }
2598
2599 /// A `[[...]]` only inside a fenced code block is a documentation example,
2600 /// not an edge: no forward edge, no backward edge, and the source page is an
2601 /// orphan (no real links). Matches validate's fence-aware extractor.
2602 #[test]
2603 fn fenced_link_is_not_an_edge_and_page_is_orphan() {
2604 let fx = Fixture::new();
2605 fx.write(
2606 "records/contacts/sarah.md",
2607 "contact",
2608 "Sarah",
2609 "the contact",
2610 );
2611 fx.write(
2612 "records/synthesis/howto.md",
2613 "synthesis",
2614 "Howto",
2615 "```markdown\n[[records/contacts/sarah]] is how you link.\n```",
2616 );
2617 fx.reindex();
2618
2619 assert!(
2620 forwardlinks(&fx.store, Path::new("records/synthesis/howto.md"))
2621 .unwrap()
2622 .is_empty(),
2623 "a fenced example is not a forward edge"
2624 );
2625 assert!(
2626 backlinks(&fx.store, Path::new("records/contacts/sarah.md"))
2627 .unwrap()
2628 .is_empty(),
2629 "a fenced example is not a backward edge"
2630 );
2631 let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2632 assert!(
2633 orphan_set.contains(&"records/synthesis/howto.md".to_string()),
2634 "a page whose only link is fenced has no real edges => orphan: {orphan_set:?}"
2635 );
2636 }
2637
2638 /// `rename` must NOT rewrite a `[[...]]` inside a fenced code block (it is
2639 /// verbatim documentation, not an edge), while still rewriting a real link.
2640 #[test]
2641 fn rewrite_links_to_leaves_fenced_examples_untouched() {
2642 let input = "\
2643Real [[records/contacts/sarah]] link.
2644
2645```markdown
2646Example: [[records/contacts/sarah]] inside a fence.
2647```
2648
2649Trailing [[records/contacts/sarah]].
2650";
2651 let got = rewrite_links_to(
2652 input,
2653 Path::new("records/contacts/sarah"),
2654 Path::new("records/contacts/sarah-chen"),
2655 );
2656 // The two non-fenced links retarget; the fenced one is verbatim.
2657 assert!(
2658 got.contains("Real [[records/contacts/sarah-chen]] link."),
2659 "real link before the fence must retarget"
2660 );
2661 assert!(
2662 got.contains("Trailing [[records/contacts/sarah-chen]]."),
2663 "real link after the fence must retarget"
2664 );
2665 assert!(
2666 got.contains("Example: [[records/contacts/sarah]] inside a fence."),
2667 "fenced example must stay verbatim, got:\n{got}"
2668 );
2669 }
2670
2671 /// `rewrite_links_to` matches a padded link and preserves the display.
2672 #[test]
2673 fn rewrite_links_to_matches_padded_link() {
2674 let got = rewrite_links_to(
2675 "See [[ records/contacts/sarah |Sarah]] today.",
2676 Path::new("records/contacts/sarah"),
2677 Path::new("records/contacts/sarah-chen"),
2678 );
2679 assert_eq!(got, "See [[records/contacts/sarah-chen|Sarah]] today.");
2680 }
2681
2682 /// On a case-insensitive filesystem a case-variant link is the same edge:
2683 /// backlinks finds it, orphans does NOT falsely orphan the target, and
2684 /// rename rewrites it. On a case-sensitive FS the link is genuinely a
2685 /// different target, so the test is skipped.
2686 #[cfg(unix)]
2687 #[test]
2688 fn case_variant_link_is_one_edge_on_case_insensitive_fs() {
2689 // Probe the filesystem the same way the production code does
2690 // (`link_edge_key` is imported at module scope).
2691 if link_edge_key("A") != link_edge_key("a") {
2692 // case-sensitive filesystem: the case-variant link is a different
2693 // target, so this scenario doesn't apply.
2694 return;
2695 }
2696 let fx = Fixture::new();
2697 fx.write(
2698 "records/contacts/sarah-chen.md",
2699 "contact",
2700 "Sarah",
2701 "the contact",
2702 );
2703 fx.write(
2704 "records/profiles/bio.md",
2705 "profile",
2706 "Bio",
2707 "See [[records/contacts/Sarah-Chen]].",
2708 );
2709 fx.reindex();
2710
2711 assert_eq!(
2712 paths(&backlinks(&fx.store, Path::new("records/contacts/sarah-chen.md")).unwrap()),
2713 vec!["records/profiles/bio"],
2714 "case-variant incoming link must be a backward edge"
2715 );
2716 let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2717 assert!(
2718 !orphan_set.contains(&"records/contacts/sarah-chen.md".to_string()),
2719 "a target with a live case-variant incoming link must NOT be orphaned: {orphan_set:?}"
2720 );
2721
2722 let rewritten = rewrite_links_to(
2723 "See [[records/contacts/Sarah-Chen]].",
2724 Path::new("records/contacts/sarah-chen"),
2725 Path::new("records/contacts/sarah"),
2726 );
2727 assert_eq!(
2728 rewritten, "See [[records/contacts/sarah]].",
2729 "rename must rewrite the case-variant link on a case-insensitive FS"
2730 );
2731 }
2732
2733 /// REGRESSION (Unicode encoding / silent graph break): a file whose name is
2734 /// written in one Unicode normalization form and an incoming link written in
2735 /// the OTHER form must be ONE edge — on macOS/APFS both name the same file
2736 /// (the FS folds NFC/NFD), so the string-keyed graph must agree. Before the
2737 /// fix, `link_edge_key` only case-folded (no NFC), so `backlinks` returned
2738 /// empty and `orphans` flagged the linked-to file as an orphan while
2739 /// `validate` saw the link as live. NFC-keying both sides unifies them.
2740 ///
2741 /// Runs on every platform: the file is written NFC and linked NFD (both
2742 /// representable in any filename), and `link_edge_key` normalizes
2743 /// unconditionally, so the assertion holds regardless of host FS folding.
2744 #[test]
2745 fn nfc_nfd_cross_normalization_link_is_one_edge() {
2746 let fx = Fixture::new();
2747 // File on disk: NFC `josé` (é = U+00E9).
2748 fx.write(
2749 "records/contacts/jos\u{00e9}.md",
2750 "contact",
2751 "Jose",
2752 "the contact",
2753 );
2754 // Incoming link: NFD `josé` (e + U+0301) — byte-different, same name.
2755 fx.write(
2756 "records/profiles/bio.md",
2757 "profile",
2758 "Bio",
2759 "Knows [[records/contacts/jose\u{0301}]].",
2760 );
2761 fx.reindex();
2762
2763 // backlinks: the NFD link must resolve to the NFC file.
2764 assert_eq!(
2765 paths(&backlinks(&fx.store, Path::new("records/contacts/jos\u{00e9}.md")).unwrap()),
2766 vec!["records/profiles/bio"],
2767 "an NFD incoming link must be a backward edge of the NFC-named file"
2768 );
2769
2770 // orphans: the linked-to file must NOT be flagged as an orphan.
2771 let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2772 assert!(
2773 !orphan_set.contains(&"records/contacts/jos\u{00e9}.md".to_string()),
2774 "a target with a live cross-normalization incoming link must NOT be orphaned: \
2775 {orphan_set:?}"
2776 );
2777
2778 // forwardlinks: the body link is a real forward edge. Its emitted target
2779 // is the canonical (normalization-PRESERVING) form — i.e. the NFD bytes
2780 // as written, NOT re-normalized to NFC — because `forwardlinks` output
2781 // feeds byte-faithful rewrites; only the comparison KEY is NFC-folded.
2782 let fwd = paths(&forwardlinks(&fx.store, &fx.p("records/profiles/bio.md")).unwrap());
2783 assert_eq!(
2784 fwd,
2785 vec!["records/contacts/jose\u{0301}"],
2786 "forwardlinks must emit the body link's canonical (NFD-preserving) target"
2787 );
2788 }
2789
2790 /// A `[[../outside/x]]` escaping wiki-link is never a forward edge, and a
2791 /// `neighborhood` from the escaping page never reads or traverses through the
2792 /// external file — closing the disclosure vector.
2793 #[cfg(unix)]
2794 #[test]
2795 fn escaping_link_is_not_an_edge_and_neighborhood_does_not_escape() {
2796 let fx = Fixture::new();
2797 // An external file OUTSIDE the store root, with its own in-store link.
2798 let outside_dir = fx.store.root.parent().unwrap().join("outside");
2799 fs::create_dir_all(&outside_dir).unwrap();
2800 fs::write(
2801 outside_dir.join("secret.md"),
2802 "---\ntype: note\nsummary: TOPSECRET\n---\nLinks [[records/contacts/sarah]].\n",
2803 )
2804 .unwrap();
2805 fx.write(
2806 "records/contacts/sarah.md",
2807 "contact",
2808 "Sarah",
2809 "the contact",
2810 );
2811 fx.write(
2812 "records/concepts/traversal.md",
2813 "concept",
2814 "Traversal",
2815 "See [[../outside/secret]].",
2816 );
2817 fx.reindex();
2818
2819 // The escaping target is not a forward edge.
2820 assert!(
2821 forwardlinks(&fx.store, Path::new("records/concepts/traversal.md"))
2822 .unwrap()
2823 .is_empty(),
2824 "an escaping `[[../outside/secret]]` must not be a forward edge"
2825 );
2826
2827 // Neighborhood from the escaping page reaches nothing through the
2828 // external file (the external file is never read/traversed).
2829 let slice = neighborhood(
2830 &fx.store,
2831 Path::new("records/concepts/traversal.md"),
2832 2,
2833 &[],
2834 Direction::Outgoing,
2835 )
2836 .unwrap();
2837 assert!(
2838 slice
2839 .nodes
2840 .iter()
2841 .all(|n| !n.path.to_string_lossy().contains("outside")),
2842 "neighborhood must not read/traverse the external file: {:?}",
2843 slice.nodes
2844 );
2845 }
2846
2847 /// REGRESSION (path-safety / info-disclosure): a wiki-link target whose path
2848 /// is made entirely of `Normal` components but routes through a **symlink**
2849 /// pointing outside the store must NOT leak the out-of-store file's
2850 /// `summary`/`type` into a `neighborhood` slice. Two shapes:
2851 /// (a) a symlinked DIRECTORY component (`records/linkdir -> /external/dir`,
2852 /// link `[[records/linkdir/secret]]`), and
2853 /// (b) a directly-symlinked `.md` (`records/aliased.md -> /external/secret.md`,
2854 /// link `[[records/aliased]]`).
2855 /// Both used to slip past the all-`Normal`-components fast path in
2856 /// `resolves_within_store` (which returned `true` without canonicalizing), so
2857 /// `store.root.join(rel)` followed the in-store symlink, `is_file()` succeeded,
2858 /// and the external file was read. The fix routes every candidate through the
2859 /// symlink-resolving `ensure_path_within_store`, so these resolve to NO
2860 /// out-of-store node — exactly like the `..` escape control above. A legitimate
2861 /// in-store link still resolves, proving the gate did not over-block.
2862 #[cfg(unix)]
2863 #[test]
2864 fn symlinked_normal_component_does_not_disclose_out_of_store_file() {
2865 use std::os::unix::fs::symlink;
2866
2867 let fx = Fixture::new();
2868 // The secret lives OUTSIDE the store root, as a sibling of it.
2869 let outside_dir = fx.store.root.parent().unwrap().join("secret");
2870 fs::create_dir_all(&outside_dir).unwrap();
2871 fs::write(
2872 outside_dir.join("secret.md"),
2873 "---\ntype: contact\nsummary: TOP SECRET\n---\n# x\n",
2874 )
2875 .unwrap();
2876
2877 // A legitimate in-store target, to prove the gate does not over-block.
2878 fx.write("records/contacts/real.md", "contact", "Real Contact", "");
2879
2880 // (a) symlinked DIRECTORY component: records/linkdir -> <outside>/secret
2881 symlink(&outside_dir, fx.store.root.join("records/linkdir")).unwrap();
2882 fx.write(
2883 "records/contacts/seed.md",
2884 "contact",
2885 "Seed",
2886 "[[records/linkdir/secret]] and the in-store [[records/contacts/real]].",
2887 );
2888
2889 // (b) directly-symlinked .md: records/aliased.md -> <outside>/secret.md
2890 symlink(
2891 outside_dir.join("secret.md"),
2892 fx.store.root.join("records/aliased.md"),
2893 )
2894 .unwrap();
2895 fx.write(
2896 "records/contacts/seed2.md",
2897 "contact",
2898 "Seed2",
2899 "[[records/aliased]]",
2900 );
2901 fx.reindex();
2902
2903 // (a): the symlinked-dir target must NOT appear; the in-store link must.
2904 let slice = neighborhood(
2905 &fx.store,
2906 &fx.p("records/contacts/seed.md"),
2907 1,
2908 &[],
2909 Direction::Outgoing,
2910 )
2911 .unwrap();
2912 assert!(
2913 !slice.nodes.iter().any(|n| n.summary == "TOP SECRET"),
2914 "a symlinked-dir component must not disclose the out-of-store summary: {:?}",
2915 slice.nodes
2916 );
2917 assert!(
2918 !slice
2919 .nodes
2920 .iter()
2921 .any(|n| n.path.to_string_lossy().contains("linkdir")),
2922 "the symlinked-out-of-store target must not be a node: {:?}",
2923 slice.nodes
2924 );
2925 assert!(
2926 slice
2927 .nodes
2928 .iter()
2929 .any(|n| n.path == fx.p("records/contacts/real")),
2930 "the legitimate in-store link must still resolve (gate did not over-block): {:?}",
2931 slice.nodes
2932 );
2933
2934 // (b): the directly-symlinked .md target must NOT disclose anything.
2935 let slice2 = neighborhood(
2936 &fx.store,
2937 &fx.p("records/contacts/seed2.md"),
2938 1,
2939 &[],
2940 Direction::Outgoing,
2941 )
2942 .unwrap();
2943 assert!(
2944 slice2.nodes.is_empty(),
2945 "a directly-symlinked .md pointing outside the store must yield no node: {:?}",
2946 slice2.nodes
2947 );
2948 }
2949
2950 #[test]
2951 fn regression_non_utf8_linker_edges_survive_scoped_backlinks_and_orphans() {
2952 // Adversarial review #10: a content file with a stray non-UTF8 byte beside
2953 // a valid ASCII `[[...]]` line must still expose its edges. The unscoped
2954 // backlink scanner reads bytes lossily, but `forwardlinks`/`orphans` used
2955 // `read_to_string` and dropped EVERY edge on `InvalidData` — so scoped
2956 // backlinks under-reported vs unscoped, and `orphans` flagged BOTH
2957 // endpoints of a live edge.
2958 let fx = Fixture::new();
2959 fx.write("records/contacts/sarah.md", "contact", "Sarah", "# Sarah");
2960 // bio.md: valid UTF-8 frontmatter, but a BODY line with a 0xE9 byte
2961 // (Latin-1 'é', invalid as standalone UTF-8) beside the link to sarah.
2962 let mut bytes: Vec<u8> = Vec::new();
2963 bytes.extend_from_slice(
2964 b"---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Bio\n---\n",
2965 );
2966 bytes.extend_from_slice(b"See [[records/contacts/sarah]] caf");
2967 bytes.push(0xE9);
2968 bytes.extend_from_slice(b"\n");
2969 let bio_abs = fx.store.root.join("records/profiles/bio.md");
2970 fs::create_dir_all(bio_abs.parent().unwrap()).unwrap();
2971 fs::write(&bio_abs, &bytes).unwrap();
2972 fx.reindex();
2973
2974 let sarah = fx.p("records/contacts/sarah");
2975
2976 // forwardlinks reads the non-UTF8 file and still finds the edge.
2977 let fwd = paths(&forwardlinks(&fx.store, &fx.p("records/profiles/bio")).unwrap());
2978 assert!(
2979 fwd.iter().any(|p| p.contains("sarah")),
2980 "forwardlinks must extract the edge from a non-UTF8 file: {fwd:?}"
2981 );
2982
2983 // Scoped backlinks (rides `forwardlinks`) must AGREE with unscoped.
2984 let unscoped = paths(&backlinks(&fx.store, &sarah).unwrap());
2985 let scoped =
2986 paths(&backlinks_filtered(&fx.store, &sarah, &["profile".to_string()], None).unwrap());
2987 assert!(
2988 unscoped.iter().any(|p| p.contains("bio")),
2989 "unscoped backlinks must include bio: {unscoped:?}"
2990 );
2991 assert!(
2992 scoped.iter().any(|p| p.contains("bio")),
2993 "scoped backlinks must agree with unscoped on the non-UTF8 linker: {scoped:?}"
2994 );
2995
2996 // Neither endpoint of the live edge may be reported as an orphan.
2997 let orph = paths(&orphans(&fx.store, None).unwrap());
2998 assert!(
2999 !orph
3000 .iter()
3001 .any(|p| p.contains("bio") || p.contains("sarah")),
3002 "neither endpoint of a live edge may be an orphan: {orph:?}"
3003 );
3004 }
3005}