dbmd_core/graph.rs
1//! `graph` — the wiki-link **relationship layer**.
2//!
3//! Wiki-links are curated-relevance edges (the LLM wrote them), so the graph's
4//! job is to **assemble the relevant context around a seed**, not to be
5//! analyzed. **All ops are on-demand — there is no maintained graph** (a
6//! persistent graph is the roadmap engine).
7//!
8//! [`backlinks`] / [`forwardlinks`] are loop ops (O(changed), never O(store)).
9//! [`neighborhood`] is the high-value context-hydration op. [`orphans`] is a
10//! SWEEP curation worklist.
11//!
12//! Whole-graph analytics (connected components, cycle detection, shortest
13//! path, sinks/sources, DOT/JSON export) are deliberately **not** here — a
14//! human studying the graph opens the store in Obsidian; broken-link detection
15//! is [`crate::validate`]'s job (`WIKI_LINK_BROKEN`).
16//!
17//! ## Implementation note — two paths for the incoming-edge scan
18//!
19//! The scale contract (SPEC § Tooling, plan: *"the interactive loop is
20//! O(changed), never O(store)"*) is the load-bearing rule here. [`backlinks`]
21//! is a loop op, so it must **not** open and `read_to_string` every content file
22//! in the store on each call. It resolves incoming edges by one of two paths,
23//! chosen by whether the call is scoped:
24//!
25//! - **Unscoped** (`dbmd graph backlinks <x>`, no `--type`/`--in`): one
26//! embedded-ripgrep pass for the literal `[[<target>]]` over the tree, via
27//! [`Store::find_links_to`] (`grep` + `ignore`, early-exit per file) — the
28//! same scan engine [`crate::validate`]'s working-set incoming-linker step
29//! uses. A single store traversal with cheap presence-only matching, not N
30//! whole-file parses; that is what keeps the unscoped call inside the loop
31//! budget. [`backlinks`] then filters the raw hits to content files and emits
32//! canonical bare targets (its relationship view), where the lower-level
33//! [`Store::find_links_to`] returns every `.md` the text appears in.
34//! - **Scoped** (`--type` / `--in`): the candidate set is enumerated from the
35//! relevant layer's `index.jsonl` sidecars — the sidecars of the one layer the
36//! `--type` belongs to (via [`Store::sidecar_records`]), filtered to that type
37//! — and each candidate is confirmed by a single-file parse. That is what makes
38//! `--type` / `--in` an *I/O* scope, not just a result filter: a typed/layer-scoped
39//! `backlinks` reads only the relevant layer's sidecars (O(entities-in-layer))
40//! and parses only those files. A type's records can span several folders within
41//! its layer (`wiki-page` under any `wiki/<topic>/`), so the read is layer-wide,
42//! not a single canonical folder — otherwise off-canonical-folder linkers would
43//! be silently dropped.
44//!
45//! **Why the scoped path confirms by parsing the candidate, not by trusting the
46//! sidecar's `links` field.** A sidecar record's `links` is the file's
47//! *frontmatter* `links:` list only — it does **not** capture wiki-links written
48//! in the body or inside other typed frontmatter fields (`company: [[…]]`,
49//! `attendees: [ … ]`, `derived_from: [ … ]`). [`forwardlinks`] extracts edges
50//! from the whole file, so to keep the two directions on the **same** edge set
51//! (an incoming edge to X is exactly: some file whose [`forwardlinks`] contains
52//! X) the incoming-edge confirmation re-parses each candidate file the same way.
53//! The sidecar bounds *which* files are candidates; the parse decides whether
54//! each truly links. The unscoped ripgrep path stays on that same edge set by
55//! matching the link text wherever it lives in the file (frontmatter or body).
56//! A node's `summary` / `type` likewise read frontmatter directly (the source of
57//! truth the sidecar is derived from; never stale).
58
59use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
60use std::io;
61use std::path::{Path, PathBuf};
62
63use ignore::WalkBuilder;
64
65use crate::index::IndexRecord;
66use crate::store::{
67 canonical_link_target, ensure_path_within_store, extract_edge_targets, fence_closes,
68 fence_opens, layer_for_type, link_edge_key, Layer, Store, StoreError,
69};
70
71/// Which edge directions a traversal follows.
72#[derive(Debug, Clone, Copy, PartialEq, Eq)]
73pub enum Direction {
74 /// Incoming edges only (backlinks).
75 Incoming,
76 /// Outgoing edges only (forwardlinks).
77 Outgoing,
78 /// Both directions.
79 Both,
80}
81
82/// One node reached during a [`neighborhood`] hydration: the file, its
83/// `summary`, and how it connects back toward the seed.
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub struct ContextNode {
86 /// The store-relative path of the reached file.
87 pub path: PathBuf,
88 /// The file's `summary` (read from its sidecar entry / frontmatter).
89 pub summary: String,
90 /// The file's `type`, when known.
91 pub type_: Option<String>,
92 /// Hop distance from the seed (the seed itself is 0).
93 pub hops: u32,
94 /// The relationship edge that brought this node into the slice: the path it
95 /// links to/from one hop closer to the seed, and the direction.
96 pub via: Option<(PathBuf, Direction)>,
97}
98
99/// The readable working-set digest [`neighborhood`] returns: the seed plus the
100/// reached nodes with their summaries and connections. The relationship-axis
101/// "turn a seed into context" primitive.
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct ContextSlice {
104 /// The seed the slice was hydrated from.
105 pub seed: PathBuf,
106 /// The reached nodes (excluding the seed), in BFS order.
107 pub nodes: Vec<ContextNode>,
108}
109
110/// Incoming edges to `path`: files that wiki-link to it. The blast-radius /
111/// dependents primitive before an edit. Store-wide (every layer / every type);
112/// see [`backlinks_filtered`] for the `--type` / `--in`-scoped form.
113///
114/// `path` is the store-relative target as it would be written inside a
115/// wiki-link (with or without a trailing `.md`; both resolve to the same
116/// target). Returns each linking file as its **canonical bare wiki-link path**
117/// (store-relative, no `.md`) — the same key [`forwardlinks`] emits, so the two
118/// directions round-trip and [`neighborhood`] can use one node identity.
119/// Deduped, sorted, never including the seed itself.
120pub fn backlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
121 backlinks_filtered(store, path, &[], None)
122}
123
124/// Incoming edges to `path`, scoped by the linking file's `type` and/or layer —
125/// the `dbmd graph backlinks --type/--in` surface.
126///
127/// **Scale (the loop contract).** Two paths, by whether the call is scoped:
128///
129/// - **Unscoped** (`types` empty *and* `layer` `None`): one embedded-ripgrep
130/// pass for `[[<target>]]` across the store via [`Store::find_links_to`] — a
131/// single `grep` + `ignore` traversal with early-exit per file, never a
132/// `read_to_string` of every content file. This is the same scan engine
133/// [`crate::validate::validate_working_set`]'s incoming-linker step rides, and
134/// it keeps the unscoped call inside the loop budget (the old per-candidate
135/// confirm-read re-opened every file in the store → O(store)).
136/// - **Scoped** (`types` and/or `layer` set): the candidate set — the files that
137/// *might* link to `path` — is read from the relevant layer's `index.jsonl`
138/// sidecars, so the call touches only the named layer(s): O(entities-in-layer),
139/// the sanctioned loop cost. Each candidate is then confirmed by a single-file
140/// parse. When `types` lists several types, the sidecars of each type's layer
141/// are read and the candidate sets unioned (filtered to the type), so a type
142/// whose records span multiple folders within its layer (e.g. `wiki-page` under
143/// any `wiki/<topic>/`) is fully covered; a `layer` further restricts the
144/// candidate paths to that layer.
145///
146/// **Correctness (one edge set, both paths).** An incoming edge to X is exactly:
147/// some file whose [`forwardlinks`] contains X — a wiki-link in the body or in
148/// *any* frontmatter field (`company: [[…]]`, `attendees: [ … ]`), not just the
149/// sidecar's frontmatter `links:` projection. Both paths honor that:
150/// - The unscoped scan matches the literal `[[<target>]]` text wherever it lives
151/// in a file (frontmatter or body), the same edges [`forwardlinks`] extracts.
152/// [`Store::find_links_to`] returns *every* `.md` carrying the link text
153/// (including `index.md` catalogs); [`backlinks`] is the relationship view, so
154/// the results are filtered to content files ([`is_content_rel`]) and emitted
155/// as canonical bare targets, self-excluded.
156/// - The scoped path confirms each candidate via [`file_links_to`], which
157/// delegates to [`forwardlinks`] (body + every frontmatter field) — so a
158/// body-only or typed-field edge is caught, not just the sidecar's `links:`
159/// list.
160///
161/// Result form (canonical bare paths, deduped, sorted, seed excluded) is
162/// identical on both paths and matches [`backlinks`].
163pub fn backlinks_filtered(
164 store: &Store,
165 path: &Path,
166 types: &[String],
167 layer: Option<Layer>,
168) -> Result<Vec<PathBuf>, StoreError> {
169 let target = normalize_target(path);
170 if target.is_empty() {
171 return Ok(Vec::new());
172 }
173 let target_key = edge_key(&target);
174
175 // Unscoped: one content pass over the store (O(store) scan with early-exit
176 // per file), not a per-candidate read of every content file. `find_links_to`
177 // returns every `.md` carrying an edge to the target (incl. catalog
178 // `index.md`); narrow to content files and canonicalize to the bare target
179 // form `backlinks` emits, dropping the seed's self-link.
180 if types.is_empty() && layer.is_none() {
181 let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
182 for rel in store.find_links_to(path)? {
183 if !is_content_rel(&rel) {
184 continue;
185 }
186 let linker = normalize_target(&rel);
187 if linker.is_empty() || edge_key(&linker) == target_key {
188 // A file never counts as its own backlink (case-folded so a
189 // case-variant self-link is still excluded).
190 continue;
191 }
192 hits.insert(PathBuf::from(linker));
193 }
194 return Ok(hits.into_iter().collect());
195 }
196
197 // Scoped: read only the named folder(s)' sidecars for the candidate set, then
198 // confirm each candidate with a single-file parse — O(folder), the I/O scope
199 // `--type` / `--in` buys.
200 let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
201 for candidate in candidate_records(store, types, layer)? {
202 let rel = &candidate.path;
203 let candidate_target = normalize_target(rel);
204 if candidate_target.is_empty() || edge_key(&candidate_target) == target_key {
205 // A file never counts as its own backlink.
206 continue;
207 }
208 // Confirm the edge by parsing the candidate file the same way
209 // forwardlinks does (body + all frontmatter), so body/typed-field links
210 // are caught — the sidecar's `links` field alone would miss them.
211 if file_links_to(store, rel, &target)? {
212 hits.insert(PathBuf::from(candidate_target));
213 }
214 }
215
216 Ok(hits.into_iter().collect())
217}
218
219/// Outgoing edges from `path`: the wiki-link targets extracted from that single
220/// file. Loop-fast; follow the evidence chain.
221///
222/// `path` is the store-relative path of the file to read. Targets are returned
223/// as store-relative paths (bare, no `.md`), deduped and sorted; the file's
224/// links to itself are dropped. A missing file yields an empty list (a
225/// dangling seed has no outgoing edges to report — broken-link detection is
226/// [`crate::validate`]'s job).
227pub fn forwardlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
228 let self_key = edge_key(&normalize_target(path));
229 let abs = match resolve_existing(store, path) {
230 Some(a) => a,
231 None => return Ok(Vec::new()),
232 };
233 let body = match std::fs::read_to_string(&abs) {
234 Ok(b) => b,
235 // A file that isn't valid UTF-8 (e.g. a binary source) carries no
236 // wiki-links we can extract.
237 Err(e) if e.kind() == io::ErrorKind::InvalidData => return Ok(Vec::new()),
238 Err(e) => return Err(StoreError::Io(e)),
239 };
240
241 let mut out: BTreeSet<PathBuf> = BTreeSet::new();
242 for target in extract_link_targets(&body) {
243 // Self-link drop is case-folded so a case-variant self-reference is also
244 // excluded on a case-insensitive filesystem.
245 if target.is_empty() || edge_key(&target) == self_key {
246 continue;
247 }
248 out.insert(PathBuf::from(target));
249 }
250 Ok(out.into_iter().collect())
251}
252
253/// The candidate set for an incoming-edge scan: the sidecar records that could
254/// link to the target, read from the type-folder `index.jsonl` sidecars (never
255/// a content-tree walk). `types`/`layer` narrow *which* sidecars are read — the
256/// I/O scope that keeps a typed/layer backlinks O(entities-in-layer).
257///
258/// - `types` non-empty: for each type, read **the whole layer** the type belongs
259/// to ([`layer_for_type`] → [`Store::sidecar_records`]) and keep the records of
260/// that `type`, unioned by path across the requested types. A `layer` filter,
261/// when given, intersects with the type's own layer (a type lives in exactly
262/// one layer, so a mismatched `--in` simply yields no candidates).
263/// - `types` empty: every sidecar record under `layer` (or store-wide when
264/// `None`) via [`Store::sidecar_records`].
265///
266/// **Why the whole layer, not just the type's canonical folder.** A `type` can
267/// legitimately span several folders within one layer — `wiki-page` is the
268/// canonical case (SPEC files it under `wiki/<topic>/` for an *arbitrary* topic:
269/// `wiki/topics/`, `wiki/people/`, `wiki/projects/`, …). Reading only the
270/// single canonical-guess folder (`wiki/topics/`) would silently drop every
271/// wiki-page filed elsewhere in the layer, so a scoped `backlinks --type
272/// wiki-page` would under-report dependents the moment that canonical folder
273/// exists — breaking the docstring's promise that the scoped edge set equals the
274/// unscoped one. Reading the type's full layer subtree and filtering by `type`
275/// is complete and still O(entities-in-layer), the sanctioned loop scope.
276fn candidate_records(
277 store: &Store,
278 types: &[String],
279 layer: Option<Layer>,
280) -> Result<Vec<IndexRecord>, StoreError> {
281 if types.is_empty() {
282 return store.sidecar_records(layer);
283 }
284 let mut by_path: std::collections::BTreeMap<PathBuf, IndexRecord> =
285 std::collections::BTreeMap::new();
286 for type_ in types {
287 // A type lives in exactly one layer; read that whole layer's sidecars so
288 // a record filed under a non-canonical folder of the same type (e.g. a
289 // `wiki-page` under `wiki/people/` rather than `wiki/topics/`) is still a
290 // candidate. An explicit `--in` layer that disagrees with the type's
291 // layer can never match the type, so skip the read entirely.
292 let type_layer = layer_for_type(type_);
293 if let Some(scope) = layer {
294 if scope != type_layer {
295 continue;
296 }
297 }
298 for rec in store.sidecar_records(Some(type_layer))? {
299 if rec.type_ == *type_ {
300 by_path.insert(rec.path.clone(), rec);
301 }
302 }
303 }
304 Ok(by_path.into_values().collect())
305}
306
307/// True if the store file at `rel` carries a wiki-link whose canonical target
308/// equals `target`. Delegates to [`forwardlinks`] so the incoming-edge predicate
309/// is *exactly* the outgoing-edge extraction — body + every frontmatter field —
310/// keeping the two directions on one edge set. `forwardlinks` already emits
311/// canonical bare targets, so `target` (likewise normalized by the caller) is
312/// compared directly. A missing/binary file links to nothing.
313fn file_links_to(store: &Store, rel: &Path, target: &str) -> Result<bool, StoreError> {
314 let edges = forwardlinks(store, rel)?;
315 let target_key = edge_key(target);
316 // Compare on the case-folded edge key so a case-variant link (e.g.
317 // `[[records/contacts/Sarah-Chen]]` to `sarah-chen.md`) is confirmed on a
318 // case-insensitive filesystem, agreeing with the unscoped scan and validate.
319 Ok(edges
320 .iter()
321 .any(|e| edge_key(&e.to_string_lossy()) == target_key))
322}
323
324/// **Context hydration.** Bounded BFS from `seed` over backlinks + forwardlinks
325/// out to `hops`, reading each reached file's `summary` + relationship, and
326/// returning a readable [`ContextSlice`]. Optionally filtered by `types` and
327/// `direction`. On-demand; no maintained graph. What the agent reaches for to
328/// assemble a working set in one call.
329///
330/// Traversal semantics:
331/// - **`hops`** bounds true graph distance from the seed. `hops == 0` returns
332/// an empty slice (the seed alone is no context).
333/// - **`direction`** selects which edges are followed: `Incoming` walks
334/// backlinks, `Outgoing` walks forwardlinks, `Both` walks the union.
335/// - **`types`**, when non-empty, filters which reached nodes appear in the
336/// slice — but traversal still passes *through* off-type nodes, so a
337/// `meeting` two hops out is still reachable through a `contact` even when
338/// filtering to `meeting`. (An empty `types` slice imposes no filter.)
339/// - Each node records the lowest hop count at which it is first reached (BFS
340/// order); the seed is never included as a node.
341///
342/// Unbounded traversal: delegates to [`neighborhood_capped`] with no node cap, so
343/// it expands every reachable node within `hops`. For a densely-interlinked store
344/// this is one full-store backlinks scan **per reached node** (O(visited × store))
345/// — prefer [`neighborhood_capped`] with a `max_nodes` cap to bound that work.
346pub fn neighborhood(
347 store: &Store,
348 seed: &Path,
349 hops: u32,
350 types: &[String],
351 direction: Direction,
352) -> Result<ContextSlice, StoreError> {
353 neighborhood_capped(store, seed, hops, types, direction, None)
354}
355
356/// [`neighborhood`] with a hard cap on how many nodes the BFS **traverses**.
357///
358/// `max_nodes` bounds the *traversal*, not just the result: each node the BFS
359/// expands triggers a per-node incoming-edge scan (an unscoped [`backlinks`] is a
360/// full-store ripgrep pass), so an uncapped neighborhood of a hub node costs
361/// O(visited × store). A post-hoc `.take(n)` on the returned nodes caps the
362/// *output* but not that work — the scans still run for every reached node. This
363/// cap stops discovering (and therefore stops scanning) once `max_nodes` distinct
364/// non-seed nodes have entered the BFS, so the expensive per-node scans are bounded
365/// to at most `max_nodes` of them. `None` is unbounded (the [`neighborhood`]
366/// behavior).
367///
368/// The cap is applied at *discovery* in BFS order, so the kept nodes are exactly
369/// the first `max_nodes` reached (closest-first by hop), and each still records its
370/// true minimum hop distance. Type-filtered (off-type) nodes count against the cap
371/// because the BFS must still traverse *through* them to reach deeper on-type
372/// nodes — the scan cost is paid when a node is expanded, on- or off-type alike.
373pub fn neighborhood_capped(
374 store: &Store,
375 seed: &Path,
376 hops: u32,
377 types: &[String],
378 direction: Direction,
379 max_nodes: Option<usize>,
380) -> Result<ContextSlice, StoreError> {
381 let seed_rel = PathBuf::from(normalize_target(seed));
382 let type_filter: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
383
384 // `discovered` guards against revisiting a node (and against re-adding the
385 // seed). BFS by levels so the first time we reach a node is its true min
386 // hop distance.
387 let mut discovered: HashSet<PathBuf> = HashSet::new();
388 discovered.insert(seed_rel.clone());
389
390 let mut nodes: Vec<ContextNode> = Vec::new();
391 let mut frontier: VecDeque<PathBuf> = VecDeque::new();
392 frontier.push_back(seed_rel.clone());
393
394 // Count of distinct non-seed nodes admitted to the BFS. Once it hits
395 // `max_nodes` we stop discovering new nodes, which stops enqueuing them, which
396 // stops the per-node full-store backlinks scan they would have triggered — the
397 // cap bounds the *traversal cost*, not only the printed result.
398 let mut admitted = 0usize;
399 let cap_reached = |admitted: usize| max_nodes.is_some_and(|cap| admitted >= cap);
400
401 let mut hop = 0u32;
402 while hop < hops && !frontier.is_empty() && !cap_reached(admitted) {
403 hop += 1;
404 let level_size = frontier.len();
405 for _ in 0..level_size {
406 if cap_reached(admitted) {
407 break;
408 }
409 let current = frontier.pop_front().expect("frontier non-empty");
410
411 // Collect this node's edges in the requested direction(s). Each
412 // edge carries the neighbor path + the direction we traversed it.
413 let mut edges: Vec<(PathBuf, Direction)> = Vec::new();
414 if matches!(direction, Direction::Outgoing | Direction::Both) {
415 for nbr in forwardlinks(store, ¤t)? {
416 edges.push((nbr, Direction::Outgoing));
417 }
418 }
419 if matches!(direction, Direction::Incoming | Direction::Both) {
420 for nbr in backlinks(store, ¤t)? {
421 edges.push((nbr, Direction::Incoming));
422 }
423 }
424
425 for (neighbor, dir) in edges {
426 if cap_reached(admitted) {
427 break;
428 }
429 if !discovered.insert(neighbor.clone()) {
430 continue;
431 }
432 admitted += 1;
433 let (summary, type_) = read_summary_and_type(store, &neighbor);
434 let include = type_filter.is_empty()
435 || type_
436 .as_deref()
437 .map(|t| type_filter.contains(t))
438 .unwrap_or(false);
439 if include {
440 nodes.push(ContextNode {
441 path: neighbor.clone(),
442 summary,
443 type_,
444 hops: hop,
445 via: Some((current.clone(), dir)),
446 });
447 }
448 // Off-type nodes are not emitted but still seed the next BFS
449 // level, so the type filter narrows the *result*, not the
450 // reachable graph.
451 frontier.push_back(neighbor);
452 }
453 }
454 }
455
456 Ok(ContextSlice {
457 seed: seed_rel,
458 nodes,
459 })
460}
461
462/// **SWEEP.** Content files with no incoming AND no outgoing wiki-links — the
463/// curation worklist ("ingested but not yet wired into the wiki"). Off the
464/// loop. Optionally scoped to a layer.
465///
466/// A file is an orphan iff it neither links out to another store file nor is
467/// linked to by one. Incoming edges are counted across the *whole* store
468/// (a link from any layer un-orphans a file), even when `layer` scopes the
469/// candidate set. Returns store-relative paths, sorted.
470pub fn orphans(store: &Store, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
471 // One walk of the whole store: for every content file, record (a) whether
472 // it has any outgoing link, and (b) accumulate the set of every target any
473 // file links to (its incoming-edge set). Both come from a single read per
474 // file — the SWEEP cost.
475 let all = walk_content_files(store)?;
476
477 // `linked_to` holds case-folded edge KEYS (not raw paths): the link text may
478 // spell a target with different casing than the on-disk file (e.g.
479 // `[[records/contacts/Sarah-Chen]]` → `sarah-chen.md`), and on a
480 // case-insensitive filesystem that is a real incoming edge. Keying on
481 // `edge_key` so the incoming-edge lookup case-folds is what stops the
482 // false-positive orphan (a file with a live case-variant link reported as
483 // orphaned) — and matches validate, which resolves the same link via the
484 // case-insensitive filesystem.
485 let mut linked_to: HashSet<String> = HashSet::new();
486 let mut has_outgoing: HashMap<PathBuf, bool> = HashMap::new();
487
488 for abs in &all {
489 let rel = match rel_path(store, abs) {
490 Some(r) => r,
491 None => continue,
492 };
493 let self_key = edge_key(&normalize_target(&rel));
494
495 let body = match std::fs::read_to_string(abs) {
496 Ok(b) => b,
497 Err(e) if e.kind() == io::ErrorKind::InvalidData => String::new(),
498 Err(e) => return Err(StoreError::Io(e)),
499 };
500
501 let mut outgoing = false;
502 for target in extract_link_targets(&body) {
503 if target.is_empty() || edge_key(&target) == self_key {
504 continue;
505 }
506 if resolve_existing(store, Path::new(&target)).is_none() {
507 continue;
508 }
509 outgoing = true;
510 linked_to.insert(edge_key(&target));
511 }
512 has_outgoing.insert(rel, outgoing);
513 }
514
515 let mut out: BTreeSet<PathBuf> = BTreeSet::new();
516 for abs in &all {
517 let rel = match rel_path(store, abs) {
518 Some(r) => r,
519 None => continue,
520 };
521 if let Some(layer) = layer {
522 if path_layer(&rel) != Some(layer) {
523 continue;
524 }
525 }
526 let outgoing = has_outgoing.get(&rel).copied().unwrap_or(false);
527 let incoming = linked_to.contains(&edge_key(&normalize_target(&rel)));
528 if !outgoing && !incoming {
529 out.insert(rel);
530 }
531 }
532
533 Ok(out.into_iter().collect())
534}
535
536/// **Write-side.** Rewrite every incoming `[[old]]` wiki-link in `text` to
537/// `[[new]]`, preserving any `|display` override and emitting the canonical bare
538/// target (no `.md`). The write-side twin of [`backlinks`]: where `backlinks`
539/// *finds* the files carrying an edge to `old`, this *retargets* that edge to
540/// `new` inside one file's contents.
541///
542/// `old` and `new` are store-relative paths in the wiki-link sense — both are
543/// passed through the same [`normalize_target`] the read side keys on, so the
544/// `.md` and bare spellings of `old` collapse to one target and a match here is
545/// exactly a match [`backlinks`] / [`Store::find_links_to`](crate::Store::find_links_to)
546/// would report. A link is rewritten iff its normalized target equals
547/// `normalize_target(old)`; prefix collisions (`old=a/b` vs `[[a/bc]]`) and
548/// short-form links never match. Returns the rewritten text (identical to the
549/// input when nothing matched), so the caller can cheaply detect a no-op.
550///
551/// Operates on the raw text (not a parser round-trip) so a link in frontmatter
552/// or body is retargeted uniformly and nothing else is reflowed — **except** a
553/// `[[...]]` inside a ``` fenced code block, which is a documentation example,
554/// not an edge: `rename` must NOT mutate fenced verbatim content (validate
555/// treats fenced links as non-edges, so rewriting them silently corrupts the
556/// example and makes rename disagree with validate). Matching is fence-aware,
557/// whitespace-trimmed, and case-folded to the filesystem, the exact edge notion
558/// [`backlinks`]/[`forwardlinks`] use — so rename retargets precisely the edges
559/// those report and nothing else.
560pub fn rewrite_links_to(text: &str, old: &Path, new: &Path) -> String {
561 let old_target = normalize_target(old);
562 let new_target = normalize_target(new);
563 if old_target.is_empty() {
564 // No target to match → never rewrite anything.
565 return text.to_string();
566 }
567 let old_key = edge_key(&old_target);
568
569 let mut out = String::with_capacity(text.len());
570 // Track the fence as a `(char, run length)` exactly like validate and
571 // `extract_edge_targets` (NOT a bool toggled on any ``` / ~~~ line). The
572 // naive toggle flips mid-block on a nested/indented/long-run fence, so a
573 // fenced example link would be rewritten — corrupting documentation and
574 // making rename disagree with validate's edge notion.
575 let mut fence: Option<(u8, usize)> = None;
576 // `split_inclusive` keeps each line's trailing `\n`, so copying a chunk
577 // verbatim preserves the original line endings exactly.
578 for line in text.split_inclusive('\n') {
579 // The fence rules key on line content without trailing `\r`/`\n`; the
580 // full chunk (line endings intact) is what we copy verbatim.
581 let content = line.trim_end_matches('\n').trim_end_matches('\r');
582 if let Some(f) = fence {
583 // Inside a fenced code block: copy verbatim, never rewrite. Only a
584 // matching closing fence ends the block.
585 if fence_closes(content, f) {
586 fence = None;
587 }
588 out.push_str(line);
589 continue;
590 }
591 if let Some(opened) = fence_opens(content) {
592 fence = Some(opened);
593 out.push_str(line);
594 continue;
595 }
596 rewrite_links_in_line(line, &old_key, &new_target, &mut out);
597 }
598 out
599}
600
601/// Rewrite every `[[...]]` on a single (non-fenced) line whose target matches
602/// `old_key`, appending the result to `out`. Preserves any `|display` override
603/// verbatim and emits the canonical bare `new_target`. A `[[...]]` whose target
604/// does not match (a prefix sibling, the short form, an unrelated target) is
605/// copied through untouched.
606fn rewrite_links_in_line(line: &str, old_key: &str, new_target: &str, out: &mut String) {
607 let bytes = line.as_bytes();
608 let mut i = 0usize;
609 let mut last = 0usize;
610 while i + 1 < bytes.len() {
611 if bytes[i] == b'[' && bytes[i + 1] == b'[' {
612 if let Some(close) = line[i + 2..].find("]]") {
613 let inner = &line[i + 2..i + 2 + close];
614 // An embedded newline means this isn't a single-line link.
615 if !inner.contains('\n') {
616 let (raw_target, display) = match inner.split_once('|') {
617 Some((t, d)) => (t, Some(d)),
618 None => (inner, None),
619 };
620 let raw_target = raw_target.trim();
621 // Match on the SAME edge key the read side uses, so `[[old]]`,
622 // `[[old.md]]`, `[[ ./old ]]`, and (case-insensitive FS)
623 // `[[Old]]` all retarget while `[[old-jr]]` never does.
624 if !raw_target.is_empty()
625 && !raw_target.starts_with('[')
626 && edge_key(&canonical_link_target(raw_target)) == old_key
627 {
628 out.push_str(&line[last..i]);
629 out.push_str("[[");
630 out.push_str(new_target);
631 if let Some(display) = display {
632 out.push('|');
633 out.push_str(display);
634 }
635 out.push_str("]]");
636 i = i + 2 + close + 2;
637 last = i;
638 continue;
639 }
640 }
641 // Not a matching link: skip past this `]]` so an inner `[[`
642 // isn't re-scanned, but leave the text for the verbatim copy.
643 i = i + 2 + close + 2;
644 continue;
645 }
646 }
647 i += 1;
648 }
649 out.push_str(&line[last..]);
650}
651
652// ── Private helpers ─────────────────────────────────────────────────────────
653
654/// Normalize a store-relative path into the canonical wiki-link target form:
655/// forward slashes, no leading `./` or `/`, and no trailing `.md`. This is the
656/// canonical (case-PRESERVING) identity used for output and rewrites; edge
657/// *comparisons* go through [`edge_key`] so the `.md`/bare forms AND (on a
658/// case-insensitive filesystem) case-variant spellings of a target unify. The
659/// shared [`canonical_link_target`] is the single definition every db.md
660/// link op keys on.
661fn normalize_target(path: &Path) -> String {
662 canonical_link_target(&path.to_string_lossy())
663}
664
665/// The comparison key for an edge: the canonical target case-folded to the
666/// filesystem (identity on a case-sensitive FS, lowercased on macOS/Windows), so
667/// the string-keyed graph compares agree with the filesystem's case-insensitive
668/// `is_file()` resolution. `[[records/contacts/Sarah-Chen]]` and the on-disk
669/// `sarah-chen.md` must be the same edge on a case-insensitive filesystem or
670/// backlinks/orphans/rename silently disagree with validate.
671fn edge_key(canonical_target: &str) -> String {
672 link_edge_key(canonical_target)
673}
674
675/// Extract every wiki-link target from a body, normalized to the canonical
676/// store-relative form. Fence-aware and whitespace-trimmed via the shared
677/// [`extract_edge_targets`] — a `[[...]]` inside a ``` fenced code block is a
678/// documentation example, NOT an edge (matching validate), and `[[ x ]]`
679/// padding resolves identically to `[[x]]`. A target that would escape the store
680/// root (a `..` component) is dropped here too, so an escaping `[[../outside/x]]`
681/// is never reported as a forward edge and never seeds a [`neighborhood`]
682/// traversal out of the store (the disclosure vector validate flags as an
683/// error). Order-preserving; duplicates kept (callers dedup).
684fn extract_link_targets(body: &str) -> Vec<String> {
685 extract_edge_targets(body)
686 .into_iter()
687 .filter(|t| is_within_store_target(t))
688 .collect()
689}
690
691/// True if a canonical target stays inside the store: it has no `..`
692/// (`ParentDir`) component. The canonical form has already stripped any leading
693/// `./` or `/`, so a `Normal`-only path is a safe store-relative key; a `..`
694/// component is an escape and is rejected, mirroring validate's safe-path guard.
695fn is_within_store_target(target: &str) -> bool {
696 Path::new(target)
697 .components()
698 .all(|c| matches!(c, std::path::Component::Normal(_)))
699}
700
701/// Resolve the store root + a store-relative path to the absolute on-disk file,
702/// trying the path as written and then with a `.md` extension. `None` if neither
703/// exists **or if the target resolves outside the store root** — a `..`-laden or
704/// symlink-escaping wiki-link must never turn a graph read/traversal into a read
705/// of an arbitrary file outside the store (the `dbmd graph neighborhood`
706/// disclosure vector). Containment is enforced via the shared
707/// [`ensure_path_within_store`] gate, matching validate's safe-path guard.
708fn resolve_existing(store: &Store, store_relative: &Path) -> Option<PathBuf> {
709 let direct = store.root.join(store_relative);
710 if direct.is_file() && resolves_within_store(store, store_relative, &direct) {
711 return Some(direct);
712 }
713 let normalized = normalize_target(store_relative);
714 let with_md = store.root.join(format!("{normalized}.md"));
715 if with_md.is_file() && resolves_within_store(store, Path::new(&normalized), &with_md) {
716 return Some(with_md);
717 }
718 None
719}
720
721/// Containment check for a candidate on-disk path, with a cheap fast path. A
722/// store-relative path made of only `Normal` components (no `..`, no absolute /
723/// platform prefix) is trivially inside the root, so the common case avoids the
724/// `canonicalize` syscalls entirely. Anything with a `..`/absolute/prefix
725/// component falls through to the authoritative [`ensure_path_within_store`]
726/// gate (symlink-resolving), which is the only thing that can prove an escaping
727/// or symlink-redirected path actually stays inside the store.
728fn resolves_within_store(store: &Store, store_relative: &Path, abs: &Path) -> bool {
729 let plain_relative = !store_relative.is_absolute()
730 && store_relative
731 .components()
732 .all(|c| matches!(c, std::path::Component::Normal(_)));
733 if plain_relative {
734 return true;
735 }
736 ensure_path_within_store(&store.root, abs).is_ok()
737}
738
739/// Convert an absolute path under the store root into its store-relative form.
740fn rel_path(store: &Store, abs: &Path) -> Option<PathBuf> {
741 abs.strip_prefix(&store.root).ok().map(|p| p.to_path_buf())
742}
743
744/// Which layer a store-relative path sits in, by its first component.
745fn path_layer(rel: &Path) -> Option<Layer> {
746 let first = rel.components().next()?;
747 match first.as_os_str().to_str()? {
748 "sources" => Some(Layer::Sources),
749 "records" => Some(Layer::Records),
750 _ => None,
751 }
752}
753
754/// True if a store-relative path is a *content* file: under `sources/`,
755/// `records/`, or `wiki/`, a `.md` file, and not an `index.md`. Meta files
756/// (`DB.md`, `log.md`, `log/…`, sidecars) are excluded.
757fn is_content_rel(rel: &Path) -> bool {
758 if path_layer(rel).is_none() {
759 return false;
760 }
761 match rel.extension().and_then(|e| e.to_str()) {
762 Some("md") => {}
763 _ => return false,
764 }
765 rel.file_name().and_then(|n| n.to_str()) != Some("index.md")
766}
767
768/// Walk every content `.md` file in the store via the **`ignore`** walker
769/// (the ripgrep directory engine). Only the three layer roots
770/// (`sources/`/`records/`/`wiki/`) are descended, so `DB.md`, `log.md`, and
771/// `log/` at the store root are structurally never reached; hidden dirs and
772/// per-folder `index.md` sidecars are filtered out ([`is_content_rel`]). Honors
773/// `.gitignore` the way `rg` does. Returns absolute paths. SWEEP-class.
774fn walk_content_files(store: &Store) -> Result<Vec<PathBuf>, StoreError> {
775 let mut out = Vec::new();
776 for layer in Layer::all() {
777 let dir = store.root.join(layer_dir_name(layer));
778 if !dir.is_dir() {
779 continue;
780 }
781 let walker = WalkBuilder::new(&dir)
782 .hidden(true)
783 .git_ignore(true)
784 .git_global(false)
785 .require_git(false)
786 // Follow symlinks so a symlinked `.md` content file or a symlinked
787 // type folder is walked like any other content (consistent with the
788 // store SWEEP walker), rather than silently vanishing from orphans.
789 .follow_links(true)
790 .build();
791 for result in walker {
792 let entry = result.map_err(|e| StoreError::Search {
793 root: store.root.clone(),
794 message: format!("walk failed: {e}"),
795 })?;
796 // A followed symlink entry reports its own type as `is_symlink()`, so
797 // also accept a symlink whose target is a regular file.
798 let is_file = match entry.file_type() {
799 Some(ft) if ft.is_file() => true,
800 Some(ft) if ft.is_symlink() => std::fs::metadata(entry.path())
801 .map(|m| m.is_file())
802 .unwrap_or(false),
803 _ => false,
804 };
805 if !is_file {
806 continue;
807 }
808 let abs = entry.into_path();
809 if let Some(rel) = rel_path(store, &abs) {
810 if is_content_rel(&rel) {
811 out.push(abs);
812 }
813 }
814 }
815 }
816 Ok(out)
817}
818
819/// The on-disk folder name for a layer. Mirrors `Layer::dir_name`; kept local
820/// so the graph module owns its own copy rather than coupling to that body.
821fn layer_dir_name(layer: Layer) -> &'static str {
822 match layer {
823 Layer::Sources => "sources",
824 Layer::Records => "records",
825 }
826}
827
828/// Read a reached node's `summary` and `type` from its frontmatter. A missing
829/// file, missing frontmatter, or unparseable YAML degrades to an empty summary
830/// / unknown type rather than failing the whole hydration — `neighborhood` is
831/// best-effort context assembly, not validation.
832fn read_summary_and_type(store: &Store, rel: &Path) -> (String, Option<String>) {
833 let abs = match resolve_existing(store, rel) {
834 Some(a) => a,
835 None => return (String::new(), None),
836 };
837 let text = match std::fs::read_to_string(&abs) {
838 Ok(t) => t,
839 Err(_) => return (String::new(), None),
840 };
841 let yaml = match frontmatter_block(&text) {
842 Some(y) => y,
843 None => return (String::new(), None),
844 };
845 let value: serde_norway::Value = match serde_norway::from_str(yaml) {
846 Ok(v) => v,
847 Err(_) => return (String::new(), None),
848 };
849 let summary = value
850 .get("summary")
851 .and_then(|v| v.as_str())
852 .unwrap_or("")
853 .to_string();
854 let type_ = value
855 .get("type")
856 .and_then(|v| v.as_str())
857 .map(|s| s.to_string());
858 (summary, type_)
859}
860
861/// Return the YAML between the opening and closing `---` fences (exclusive), or
862/// `None` if the text has no leading frontmatter block. Local mirror of the
863/// parser's split so the graph module stays self-contained.
864fn frontmatter_block(text: &str) -> Option<&str> {
865 // Tolerate a single leading UTF-8 BOM, matching parser/store/index/validate.
866 let text = text.strip_prefix('\u{feff}').unwrap_or(text);
867 let rest = text
868 .strip_prefix("---\n")
869 .or_else(|| text.strip_prefix("---\r\n"))?;
870 // Find the closing fence: a line that is exactly `---`.
871 let mut idx = 0usize;
872 for line in rest.split_inclusive('\n') {
873 let trimmed = line.trim_end_matches(['\r', '\n']);
874 if trimmed == "---" {
875 return Some(&rest[..idx]);
876 }
877 idx += line.len();
878 }
879 None
880}
881
882#[cfg(test)]
883mod tests {
884 use super::*;
885 use std::fs;
886 use tempfile::TempDir;
887
888 use crate::parser::Config;
889
890 // ── Fixture builder ─────────────────────────────────────────────────────
891 //
892 // A real on-disk store in a tempdir. We write actual files (frontmatter +
893 // wiki-links) and exercise the real code paths. The fixture constructs the
894 // `Store` by its public fields rather than `Store::open`, so the graph
895 // tests stand on their own and do not depend on any other module's
896 // behavior. Each test asserts the behavior the SPEC promises, derived from
897 // intent, never from echoing the function's own output.
898 //
899 // `backlinks` (and `neighborhood` in any incoming direction) enumerate their
900 // candidate set from the type-folder `index.jsonl` sidecars — the loop
901 // contract: never a whole-store content walk. A real db.md store maintains
902 // those sidecars write-through, so a test that exercises backlinks must call
903 // [`Fixture::reindex`] after writing its files to build them (the SWEEP that
904 // `dbmd index rebuild` runs). Forwardlinks/orphans read content directly and
905 // need no sidecar.
906
907 struct Fixture {
908 _tmp: TempDir,
909 store: Store,
910 }
911
912 impl Fixture {
913 fn new() -> Self {
914 let tmp = TempDir::new().expect("tempdir");
915 let root = tmp.path().to_path_buf();
916 fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n# store\n").expect("DB.md");
917 let store = Store {
918 root,
919 config: Config::default(),
920 };
921 Fixture { _tmp: tmp, store }
922 }
923
924 /// Write a content file at a store-relative path with the given type,
925 /// summary, and body. Creates parent dirs.
926 fn write(&self, rel: &str, type_: &str, summary: &str, body: &str) {
927 let abs = self.store.root.join(rel);
928 fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
929 let contents = format!(
930 "---\ntype: {type_}\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: {summary}\n---\n{body}\n"
931 );
932 fs::write(&abs, contents).expect("write file");
933 }
934
935 /// Write a raw file verbatim (for frontmatter-shape edge cases).
936 fn write_raw(&self, rel: &str, contents: &str) {
937 let abs = self.store.root.join(rel);
938 fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
939 fs::write(&abs, contents).expect("write raw");
940 }
941
942 /// Build the type-folder `index.jsonl` sidecars from the content written
943 /// so far — the state a real store is always in (write-through), and the
944 /// candidate set `backlinks` reads. Call after writing files in any test
945 /// that exercises `backlinks` or an incoming-direction `neighborhood`.
946 fn reindex(&self) {
947 crate::index::Index::rebuild_all(&self.store).expect("rebuild sidecars");
948 }
949
950 fn p(&self, rel: &str) -> PathBuf {
951 PathBuf::from(rel)
952 }
953 }
954
955 fn paths(v: &[PathBuf]) -> Vec<String> {
956 v.iter()
957 .map(|p| p.to_string_lossy().replace('\\', "/"))
958 .collect()
959 }
960
961 // ── normalize_target ────────────────────────────────────────────────────
962
963 #[test]
964 fn normalize_strips_md_and_leading_dotslash() {
965 assert_eq!(
966 normalize_target(Path::new("records/contacts/sarah.md")),
967 "records/contacts/sarah"
968 );
969 assert_eq!(
970 normalize_target(Path::new("./wiki/people/elena")),
971 "wiki/people/elena"
972 );
973 assert_eq!(normalize_target(Path::new("/records/x")), "records/x");
974 // Bare and `.md` forms must collapse to the same key, or edges won't unify.
975 assert_eq!(
976 normalize_target(Path::new("a/b")),
977 normalize_target(Path::new("a/b.md"))
978 );
979 }
980
981 // ── extract_link_targets (forwardlinks core) ────────────────────────────
982
983 #[test]
984 fn extract_handles_display_text_and_md_suffix() {
985 let body = "See [[wiki/people/sarah-chen|Sarah]] and [[records/contacts/elena.md]].";
986 let got = extract_link_targets(body);
987 assert_eq!(
988 got,
989 vec!["wiki/people/sarah-chen", "records/contacts/elena"]
990 );
991 }
992
993 #[test]
994 fn extract_ignores_external_markdown_links() {
995 // Standard markdown links are NOT wiki-links and must not be extracted
996 // (SPEC: external refs don't participate in the graph).
997 let body = "[Acme](https://acme.io) but [[records/companies/acme]] is internal.";
998 let got = extract_link_targets(body);
999 assert_eq!(got, vec!["records/companies/acme"]);
1000 }
1001
1002 #[test]
1003 fn extract_display_text_is_not_treated_as_a_target() {
1004 // A `|display` segment that looks path-like must not become a target;
1005 // only the part before `|` is the link target.
1006 let body = "[[records/contacts/sarah|sources/emails/decoy]]";
1007 let got = extract_link_targets(body);
1008 assert_eq!(got, vec!["records/contacts/sarah"]);
1009 }
1010
1011 // ── rewrite_links_to (write-side twin of backlinks) ─────────────────────
1012
1013 #[test]
1014 fn rewrite_plain_link_to_canonical_new_target() {
1015 let got = rewrite_links_to(
1016 "See [[records/contacts/sarah-chen]] today.",
1017 Path::new("records/contacts/sarah-chen"),
1018 Path::new("records/contacts/sarah-chen-acme"),
1019 );
1020 assert_eq!(got, "See [[records/contacts/sarah-chen-acme]] today.");
1021 }
1022
1023 #[test]
1024 fn rewrite_preserves_display_override() {
1025 let got = rewrite_links_to(
1026 "With [[records/contacts/sarah-chen|Sarah]].",
1027 Path::new("records/contacts/sarah-chen"),
1028 Path::new("records/contacts/sarah-chen-acme"),
1029 );
1030 assert_eq!(got, "With [[records/contacts/sarah-chen-acme|Sarah]].");
1031 }
1032
1033 #[test]
1034 fn rewrite_matches_md_suffixed_old_and_emits_bare_new() {
1035 // The `.md` spelling of the old target must match (it normalizes to the
1036 // same key the read side uses), and the new target is emitted bare —
1037 // the writer doctrine validate enforces (`WIKI_LINK_HAS_EXTENSION`).
1038 let got = rewrite_links_to(
1039 "[[records/contacts/sarah-chen.md]]",
1040 Path::new("records/contacts/sarah-chen"),
1041 Path::new("records/contacts/new.md"),
1042 );
1043 assert_eq!(got, "[[records/contacts/new]]");
1044 }
1045
1046 #[test]
1047 fn rewrite_leaves_prefix_collisions_and_short_form_untouched() {
1048 // Boundary correctness, anchored to the SAME normalize_target the read
1049 // side keys on: `records/contacts/sarah-chen` must NOT match the longer
1050 // `[[…-jr]]`, the short-form `[[sarah-chen]]`, or an unrelated target.
1051 let input = "[[records/contacts/sarah-chen-jr]] [[sarah-chen]] [[wiki/topics/x]]";
1052 let got = rewrite_links_to(
1053 input,
1054 Path::new("records/contacts/sarah-chen"),
1055 Path::new("records/contacts/new"),
1056 );
1057 assert_eq!(got, input, "no genuine edge to the seed → text unchanged");
1058 }
1059
1060 #[test]
1061 fn rewrite_handles_multiple_occurrences_and_mixed_spellings() {
1062 let got = rewrite_links_to(
1063 "[[records/x]] then [[./records/x]] and [[records/x.md|d]] end",
1064 Path::new("records/x"),
1065 Path::new("records/y"),
1066 );
1067 // All three spellings of the same target retarget; the display survives.
1068 assert_eq!(
1069 got,
1070 "[[records/y]] then [[records/y]] and [[records/y|d]] end"
1071 );
1072 }
1073
1074 #[test]
1075 fn rewrite_retargets_exactly_the_edges_the_core_parser_sees() {
1076 // The load-bearing property of moving the rewrite into core: the write
1077 // side must operate on EXACTLY the edge set the read side recognizes —
1078 // the same `extract_link_targets` / `normalize_target` grammar that
1079 // `forwardlinks` is built on. Anchor the test to that grammar (via
1080 // `forwardlinks` on a real file) rather than re-listing literals, so a
1081 // future divergence between the read parser and the write rewrite fails
1082 // here. (Coupled to `forwardlinks` — the single-file edge extractor —
1083 // not the multi-file `backlinks` traversal, so it tests the grammar, not
1084 // the walk.)
1085 let fx = Fixture::new();
1086 let body = "Met [[records/contacts/sarah.md|Sarah]] and not [[records/contacts/sarah-2]].";
1087 fx.write("wiki/people/bio.md", "wiki-page", "bio", body);
1088
1089 // Read side: the parser sees two outgoing edges, both in canonical bare
1090 // form (the `.md` spelling collapsed). `sarah` is a real edge here.
1091 let edges = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
1092 assert_eq!(
1093 paths(&edges),
1094 vec!["records/contacts/sarah", "records/contacts/sarah-2"],
1095 "fixture must contain exactly the two edges this test reasons about"
1096 );
1097
1098 // Write side: rewriting `sarah → sarah-chen` must retarget the edge the
1099 // parser recognized (matching the `.md` spelling), preserve the display,
1100 // and leave the unrelated `sarah-2` edge untouched.
1101 let got = rewrite_links_to(
1102 body,
1103 Path::new("records/contacts/sarah"),
1104 Path::new("records/contacts/sarah-chen"),
1105 );
1106 assert_eq!(
1107 got,
1108 "Met [[records/contacts/sarah-chen|Sarah]] and not [[records/contacts/sarah-2]]."
1109 );
1110
1111 // Cross-check through the parser: the rewritten text's edge set is the
1112 // original with `sarah` swapped for `sarah-chen` — proving the rewrite
1113 // moved exactly one edge, the one the read side keyed on.
1114 fx.write("wiki/people/bio.md", "wiki-page", "bio", &got);
1115 let after = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
1116 assert_eq!(
1117 paths(&after),
1118 vec!["records/contacts/sarah-2", "records/contacts/sarah-chen"],
1119 "after rewrite the parser must see the new target and not the old"
1120 );
1121 }
1122
1123 #[test]
1124 fn rewrite_empty_old_target_is_a_no_op() {
1125 // A degenerate `old` (normalizes to empty) must never rewrite anything,
1126 // mirroring backlinks' empty-target guard.
1127 let input = "[[records/x]] [[]] text";
1128 let got = rewrite_links_to(input, Path::new(""), Path::new("records/y"));
1129 assert_eq!(got, input);
1130 }
1131
1132 #[test]
1133 fn rewrite_no_match_returns_input_unchanged() {
1134 let input = "no links, [external](https://x), and [[wiki/topics/y]]";
1135 let got = rewrite_links_to(input, Path::new("records/x"), Path::new("records/z"));
1136 assert_eq!(got, input);
1137 }
1138
1139 #[test]
1140 fn rewrite_does_not_corrupt_links_in_nested_or_long_run_fences() {
1141 // Regression for the naive `starts_with("```")/("~~~")` toggle in the
1142 // rewriter: a fenced example documenting wiki-link syntax must be copied
1143 // VERBATIM, never retargeted — matching validate's edge notion. The
1144 // standard nested-fence convention (a ````-run block wrapping a ```
1145 // example) used to flip the bool mid-block, so the example link was
1146 // rewritten (silent documentation corruption).
1147 let body = "\
1148Here is how to write a link:
1149
1150````
1151```
1152[[records/contacts/bob]]
1153```
1154still fenced [[records/contacts/bob]]
1155````
1156
1157Real link: [[records/contacts/bob]].
1158";
1159 let got = rewrite_links_to(
1160 body,
1161 Path::new("records/contacts/bob"),
1162 Path::new("records/contacts/robert"),
1163 );
1164 // The two fenced examples are untouched; only the real link retargets.
1165 let expected = "\
1166Here is how to write a link:
1167
1168````
1169```
1170[[records/contacts/bob]]
1171```
1172still fenced [[records/contacts/bob]]
1173````
1174
1175Real link: [[records/contacts/robert]].
1176";
1177 assert_eq!(
1178 got, expected,
1179 "fenced example links must survive a rename verbatim; only live edges retarget"
1180 );
1181 }
1182
1183 // ── forwardlinks ─────────────────────────────────────────────────────────
1184
1185 #[test]
1186 fn forwardlinks_returns_sorted_deduped_targets_excluding_self() {
1187 let fx = Fixture::new();
1188 fx.write(
1189 "wiki/projects/renewal.md",
1190 "wiki-page",
1191 "Renewal project",
1192 "Links: [[records/contacts/sarah]] [[records/companies/acme]] [[records/contacts/sarah]] and itself [[wiki/projects/renewal]].",
1193 );
1194 // The targets need not exist on disk for forwardlinks (it reads the one
1195 // file only). Self-links are dropped; duplicates collapse; sorted asc.
1196 let got = forwardlinks(&fx.store, &fx.p("wiki/projects/renewal.md")).unwrap();
1197 assert_eq!(
1198 paths(&got),
1199 vec!["records/companies/acme", "records/contacts/sarah"]
1200 );
1201 }
1202
1203 #[test]
1204 fn forwardlinks_picks_up_wiki_links_in_frontmatter() {
1205 // SPEC: wiki-links appear in scalar + block-sequence frontmatter fields,
1206 // not just the body. forwardlinks must follow those edges too.
1207 let fx = Fixture::new();
1208 fx.write_raw(
1209 "records/meetings/m1.md",
1210 "---\ntype: meeting\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Renewal sync\ncompany: [[records/companies/acme]]\nattendees:\n - [[records/contacts/sarah]]\n - [[records/contacts/elena]]\n---\nNotes about [[wiki/projects/renewal]].\n",
1211 );
1212 let got = forwardlinks(&fx.store, &fx.p("records/meetings/m1.md")).unwrap();
1213 assert_eq!(
1214 paths(&got),
1215 vec![
1216 "records/companies/acme",
1217 "records/contacts/elena",
1218 "records/contacts/sarah",
1219 "wiki/projects/renewal",
1220 ]
1221 );
1222 }
1223
1224 #[test]
1225 fn forwardlinks_missing_file_is_empty_not_error() {
1226 let fx = Fixture::new();
1227 let got = forwardlinks(&fx.store, &fx.p("wiki/people/ghost.md")).unwrap();
1228 assert!(got.is_empty());
1229 }
1230
1231 #[test]
1232 fn forwardlinks_resolves_seed_given_without_md_extension() {
1233 let fx = Fixture::new();
1234 fx.write(
1235 "wiki/people/sarah.md",
1236 "wiki-page",
1237 "Sarah bio",
1238 "Works at [[records/companies/acme]].",
1239 );
1240 // Seed passed in bare wiki-link form (no `.md`) must still resolve.
1241 let got = forwardlinks(&fx.store, &fx.p("wiki/people/sarah")).unwrap();
1242 assert_eq!(paths(&got), vec!["records/companies/acme"]);
1243 }
1244
1245 // ── backlinks ──────────────────────────────────────────────────────────
1246
1247 #[test]
1248 fn backlinks_finds_incoming_across_layers_and_link_forms() {
1249 let fx = Fixture::new();
1250 // Target.
1251 fx.write("records/contacts/sarah.md", "contact", "Sarah Chen", "");
1252 // Three different incoming-link spellings, all to the same target.
1253 fx.write(
1254 "records/profiles/sarah.md",
1255 "profile",
1256 "bio",
1257 "See [[records/contacts/sarah]].",
1258 );
1259 fx.write(
1260 "records/meetings/m1.md",
1261 "meeting",
1262 "Renewal call",
1263 "Attendee [[records/contacts/sarah|Sarah]].",
1264 );
1265 fx.write(
1266 "sources/emails/e1.md",
1267 "email",
1268 "Hi",
1269 "From [[records/contacts/sarah.md]] today.",
1270 );
1271 // A file that links to a DIFFERENT contact must not be a backlink.
1272 fx.write(
1273 "records/profiles/other.md",
1274 "profile",
1275 "x",
1276 "[[records/contacts/sarah-2]]",
1277 );
1278 fx.reindex();
1279
1280 // All three link forms ([[x]], [[x|d]], [[x.md]]) resolve to the same
1281 // target and are found; the linkers are returned in canonical bare form.
1282 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1283 assert_eq!(
1284 paths(&got),
1285 vec![
1286 "records/meetings/m1",
1287 "records/profiles/sarah",
1288 "sources/emails/e1",
1289 ]
1290 );
1291 }
1292
1293 #[test]
1294 fn backlinks_and_forwardlinks_round_trip_on_same_key() {
1295 // If A forwardlinks to B, then B backlinks to A — both expressed in the
1296 // identical bare key, so neighborhood can dedup across directions.
1297 let fx = Fixture::new();
1298 fx.write(
1299 "records/profiles/a.md",
1300 "profile",
1301 "A",
1302 "Knows [[records/profiles/b]].",
1303 );
1304 fx.write("records/profiles/b.md", "profile", "B", "");
1305 fx.reindex();
1306 let fwd = forwardlinks(&fx.store, &fx.p("records/profiles/a.md")).unwrap();
1307 let back = backlinks(&fx.store, &fx.p("records/profiles/b.md")).unwrap();
1308 assert_eq!(paths(&fwd), vec!["records/profiles/b"]);
1309 assert_eq!(paths(&back), vec!["records/profiles/a"]);
1310 }
1311
1312 #[test]
1313 fn backlinks_does_not_match_path_prefix_collisions() {
1314 let fx = Fixture::new();
1315 fx.write("records/contacts/sam.md", "contact", "Sam", "");
1316 // `sam-smith` shares the `sam` prefix; must NOT count as a backlink to `sam`.
1317 fx.write(
1318 "records/profiles/x.md",
1319 "profile",
1320 "x",
1321 "[[records/contacts/sam-smith]]",
1322 );
1323 // The genuine backlink.
1324 fx.write(
1325 "records/profiles/y.md",
1326 "profile",
1327 "y",
1328 "[[records/contacts/sam]]",
1329 );
1330 fx.reindex();
1331
1332 let got = backlinks(&fx.store, &fx.p("records/contacts/sam")).unwrap();
1333 assert_eq!(paths(&got), vec!["records/profiles/y"]);
1334 }
1335
1336 #[test]
1337 fn backlinks_excludes_self_reference() {
1338 let fx = Fixture::new();
1339 // A page that links to itself is not its own backlink.
1340 fx.write(
1341 "wiki/synthesis/overview.md",
1342 "wiki-page",
1343 "Overview",
1344 "This page [[wiki/synthesis/overview]] references itself.",
1345 );
1346 fx.reindex();
1347 let got = backlinks(&fx.store, &fx.p("wiki/synthesis/overview.md")).unwrap();
1348 assert!(
1349 got.is_empty(),
1350 "self-link must not appear as a backlink, got {got:?}"
1351 );
1352 }
1353
1354 #[test]
1355 fn backlinks_empty_when_nobody_links() {
1356 let fx = Fixture::new();
1357 fx.write("records/contacts/lonely.md", "contact", "Lonely", "");
1358 fx.write(
1359 "wiki/people/unrelated.md",
1360 "wiki-page",
1361 "x",
1362 "[[records/companies/acme]]",
1363 );
1364 fx.reindex();
1365 let got = backlinks(&fx.store, &fx.p("records/contacts/lonely.md")).unwrap();
1366 assert!(got.is_empty());
1367 }
1368
1369 #[test]
1370 fn backlinks_ignores_index_and_meta_files() {
1371 let fx = Fixture::new();
1372 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1373 // An index.md that lists the target must NOT be reported as a backlink
1374 // (indexes are catalog, not relationship edges).
1375 fx.write_raw(
1376 "records/contacts/index.md",
1377 "---\ntype: index\nscope: folder\nfolder: records/contacts\n---\n- [[records/contacts/sarah]] — Sarah\n",
1378 );
1379 fx.reindex();
1380 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1381 assert!(got.is_empty(), "index.md must be excluded, got {got:?}");
1382 }
1383
1384 #[test]
1385 fn backlinks_finds_body_only_edge_not_in_frontmatter_links_field() {
1386 // REGRESSION: the sidecar's `links` field carries only the file's
1387 // frontmatter `links:` list; it does NOT include wiki-links written in
1388 // the body or in other typed frontmatter fields. Answering backlinks
1389 // from `links[]` alone would silently miss this edge. The candidate set
1390 // is sidecar-bounded, but each candidate's edge is confirmed by parsing
1391 // the file (the same extraction forwardlinks uses), so a body-only link
1392 // must still register as a backlink.
1393 let fx = Fixture::new();
1394 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1395 // `meeting.md` links to sarah ONLY in its body — its frontmatter has no
1396 // `links:` field at all, so the sidecar record's `links` is empty.
1397 fx.write(
1398 "records/meetings/standup.md",
1399 "meeting",
1400 "Standup",
1401 "Discussed renewal with [[records/contacts/sarah]].",
1402 );
1403 fx.reindex();
1404
1405 // Guard the premise: the sidecar record really does carry an empty
1406 // `links` (so this test fails loudly if the index ever starts extracting
1407 // body links — at which point the backlink predicate could be revisited).
1408 let rec = fx
1409 .store
1410 .find_by_type("meeting")
1411 .unwrap()
1412 .into_iter()
1413 .find(|r| r.path == fx.p("records/meetings/standup.md"))
1414 .expect("meeting is catalogued in its sidecar");
1415 assert!(
1416 rec.links.is_empty(),
1417 "premise: the body link is NOT projected into the sidecar `links` field; got {:?}",
1418 rec.links
1419 );
1420
1421 // Yet backlinks still finds it — because it confirms via the file parse,
1422 // not via the sidecar `links` field.
1423 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1424 assert_eq!(
1425 paths(&got),
1426 vec!["records/meetings/standup"],
1427 "a body-only wiki-link must register as a backlink"
1428 );
1429 }
1430
1431 #[test]
1432 fn backlinks_finds_edge_in_typed_frontmatter_field() {
1433 // A wiki-link inside a *typed* frontmatter field (`company:`) is a real
1434 // edge forwardlinks follows, so backlinks must find it too — even though
1435 // the sidecar's `links` field (the `links:` key only) does not list it.
1436 let fx = Fixture::new();
1437 fx.write("records/companies/acme.md", "company", "Acme", "");
1438 fx.write_raw(
1439 "records/contacts/sarah.md",
1440 "---\ntype: contact\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Sarah\ncompany: [[records/companies/acme]]\n---\nBody with no links.\n",
1441 );
1442 fx.reindex();
1443 let got = backlinks(&fx.store, &fx.p("records/companies/acme.md")).unwrap();
1444 assert_eq!(
1445 paths(&got),
1446 vec!["records/contacts/sarah"],
1447 "a wiki-link in a typed frontmatter field is an incoming edge"
1448 );
1449 }
1450
1451 #[test]
1452 fn backlinks_unscoped_scans_the_tree_not_only_the_sidecar() {
1453 // REGRESSION (loop budget): an UNSCOPED `backlinks` must resolve incoming
1454 // edges with a SINGLE embedded-ripgrep pass over the tree
1455 // (`Store::find_links_to`), NOT by reading the sidecar candidate set and
1456 // then `read_to_string`-confirming each candidate (which re-opens every
1457 // content file → O(store); the documented >3x budget miss). A ripgrep
1458 // pass is the same scan engine `validate`/`rename`/`dbmd links` ride, and
1459 // the tree — not the sidecar — is its ground truth: a linker that is on
1460 // disk but absent from every sidecar (stale / never-built index) is still
1461 // found. We assert that behaviorally, which fails loudly if the unscoped
1462 // path ever reverts to the sidecar-bounded per-candidate confirm loop
1463 // (that loop would NOT find the unindexed linker).
1464 let fx = Fixture::new();
1465 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1466 fx.write(
1467 "records/profiles/indexed.md",
1468 "profile",
1469 "Indexed",
1470 "[[records/contacts/sarah]]",
1471 );
1472 fx.reindex(); // builds sidecars for sarah + the indexed linker
1473
1474 // Now drop a NEW linker on disk WITHOUT reindexing — it is on disk but in
1475 // no sidecar.
1476 fx.write(
1477 "records/profiles/unindexed.md",
1478 "profile",
1479 "Unindexed",
1480 "[[records/contacts/sarah]]",
1481 );
1482
1483 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1484 assert_eq!(
1485 paths(&got),
1486 vec!["records/profiles/indexed", "records/profiles/unindexed"],
1487 "unscoped backlinks ripgrep-scans the tree, so the on-disk-but-unindexed \
1488 linker is found too — not only the sidecar-catalogued one"
1489 );
1490 }
1491
1492 #[test]
1493 fn backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk() {
1494 // REGRESSION (scale contract): the SCOPED form (`--type` / `--in`) is the
1495 // I/O-scoped path — it enumerates candidates from the relevant type-folder
1496 // `index.jsonl` sidecars and parses only those, NOT a whole-tree walk.
1497 // That is what makes the scope an I/O scope, not just a result filter:
1498 // a linker that is on disk but ABSENT from the sidecar (stale / never-built
1499 // index) is NOT discovered by the scoped call (the sidecar bounds which
1500 // files are candidates). This is the loop-vs-walk distinction the SPEC
1501 // draws, and it is exactly the inverse of the unscoped tree scan above.
1502 let fx = Fixture::new();
1503 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1504 fx.write(
1505 "records/profiles/indexed.md",
1506 "profile",
1507 "Indexed",
1508 "[[records/contacts/sarah]]",
1509 );
1510 fx.reindex(); // builds sidecars for sarah + the indexed linker
1511
1512 // Drop a NEW profile linker on disk WITHOUT reindexing — on disk, in no
1513 // sidecar.
1514 fx.write(
1515 "records/profiles/unindexed.md",
1516 "profile",
1517 "Unindexed",
1518 "[[records/contacts/sarah]]",
1519 );
1520
1521 // Scoped to the `profile` type: the candidate set is the sidecar's, so
1522 // only the catalogued linker is found — the unindexed one is invisible.
1523 let only_profiles = vec!["profile".to_string()];
1524 let got = backlinks_filtered(
1525 &fx.store,
1526 &fx.p("records/contacts/sarah.md"),
1527 &only_profiles,
1528 None,
1529 )
1530 .unwrap();
1531 assert_eq!(
1532 paths(&got),
1533 vec!["records/profiles/indexed"],
1534 "scoped backlinks reads the sidecar candidate set; the on-disk-but-unindexed \
1535 linker is not tree-walked"
1536 );
1537 }
1538
1539 #[test]
1540 fn backlinks_filtered_type_scopes_the_candidate_set() {
1541 // `--type` narrows backlinks to linkers of that type. Two files link to
1542 // the target — one `meeting`, one `wiki-page`; filtering to `meeting`
1543 // returns only the meeting.
1544 let fx = Fixture::new();
1545 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1546 fx.write(
1547 "records/meetings/m1.md",
1548 "meeting",
1549 "Call",
1550 "[[records/contacts/sarah]]",
1551 );
1552 fx.write(
1553 "records/profiles/bio.md",
1554 "profile",
1555 "Bio",
1556 "[[records/contacts/sarah]]",
1557 );
1558 fx.reindex();
1559
1560 let only_meetings = vec!["meeting".to_string()];
1561 let got = backlinks_filtered(
1562 &fx.store,
1563 &fx.p("records/contacts/sarah.md"),
1564 &only_meetings,
1565 None,
1566 )
1567 .unwrap();
1568 assert_eq!(
1569 paths(&got),
1570 vec!["records/meetings/m1"],
1571 "--type meeting must exclude the profile linker"
1572 );
1573
1574 // Unfiltered, both come back — proving the filter (not the data) dropped one.
1575 let all = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1576 assert_eq!(
1577 paths(&all),
1578 vec!["records/meetings/m1", "records/profiles/bio"]
1579 );
1580 }
1581
1582 #[test]
1583 fn backlinks_filtered_layer_scopes_the_candidate_set() {
1584 // `--in <layer>` narrows backlinks to linkers under that layer. The two
1585 // linkers live in different layers (a sources email and a records
1586 // meeting) so the scope genuinely separates them.
1587 let fx = Fixture::new();
1588 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1589 fx.write(
1590 "records/meetings/m1.md",
1591 "meeting",
1592 "Call",
1593 "[[records/contacts/sarah]]",
1594 );
1595 fx.write(
1596 "sources/emails/intro.md",
1597 "email",
1598 "Intro",
1599 "[[records/contacts/sarah]]",
1600 );
1601 fx.reindex();
1602
1603 let got = backlinks_filtered(
1604 &fx.store,
1605 &fx.p("records/contacts/sarah.md"),
1606 &[],
1607 Some(Layer::Sources),
1608 )
1609 .unwrap();
1610 assert_eq!(
1611 paths(&got),
1612 vec!["sources/emails/intro"],
1613 "--in sources must keep only the sources-layer linker"
1614 );
1615
1616 let records_only = backlinks_filtered(
1617 &fx.store,
1618 &fx.p("records/contacts/sarah.md"),
1619 &[],
1620 Some(Layer::Records),
1621 )
1622 .unwrap();
1623 assert_eq!(paths(&records_only), vec!["records/meetings/m1"]);
1624 }
1625
1626 #[test]
1627 fn backlinks_scoped_type_spans_all_topic_folders_in_its_layer() {
1628 // REGRESSION (finding #12): a `type` can legitimately span several folders
1629 // within one layer — a `profile` is filed under its canonical
1630 // `records/profiles/` folder, but an agent may also file a profile under
1631 // another `records/<folder>/` (the type, not the folder, is authoritative).
1632 // The scoped candidate set must read the whole `records/` layer and filter
1633 // by type, NOT just the canonical-guess folder `records/profiles/`. Before
1634 // the fix, `find_by_type("profile")` read ONLY `records/profiles/index.jsonl`
1635 // whenever that sidecar existed, silently dropping every profile linker
1636 // filed under any other folder — so `backlinks --type profile` under-reported
1637 // dependents (a wrong blast-radius check) the moment a `records/profiles/`
1638 // page also existed.
1639 //
1640 // The trigger needs BOTH: a populated `records/profiles/` (so its canonical
1641 // sidecar exists) AND a profile elsewhere in the layer that links the
1642 // target. The earlier
1643 // `backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk` test
1644 // masks this bug precisely because its fixture has no `records/profiles/`.
1645 let fx = Fixture::new();
1646 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1647 // A profile in the CANONICAL type folder, NOT linking the target — its
1648 // only purpose is to make `records/profiles/index.jsonl` exist on disk.
1649 fx.write(
1650 "records/profiles/glossary.md",
1651 "profile",
1652 "Glossary",
1653 "No link to sarah here.",
1654 );
1655 // A profile in a NON-canonical folder that DOES link the target.
1656 fx.write(
1657 "records/people/sarah.md",
1658 "profile",
1659 "Sarah bio",
1660 "Profile of [[records/contacts/sarah]].",
1661 );
1662 fx.reindex(); // builds records/profiles/index.jsonl AND records/people/index.jsonl
1663
1664 // Scoped to `profile`: the off-canonical linker MUST be found. Pre-fix,
1665 // the candidate set was only `records/profiles/`'s sidecar, so this was empty.
1666 let scoped = backlinks_filtered(
1667 &fx.store,
1668 &fx.p("records/contacts/sarah.md"),
1669 &["profile".to_string()],
1670 None,
1671 )
1672 .unwrap();
1673 assert_eq!(
1674 paths(&scoped),
1675 vec!["records/people/sarah"],
1676 "a profile filed outside records/profiles/ must still be a scoped backlink"
1677 );
1678
1679 // Cross-check: the unscoped path (ripgrep tree scan) finds the same single
1680 // linker, proving the scoped result is now complete — not over- or
1681 // under-counting — and that the data was real all along.
1682 let unscoped = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1683 assert_eq!(
1684 paths(&unscoped),
1685 vec!["records/people/sarah"],
1686 "scoped and unscoped backlinks must agree on the edge set"
1687 );
1688 }
1689
1690 // ── neighborhood ─────────────────────────────────────────────────────────
1691
1692 #[test]
1693 fn neighborhood_hops_zero_is_empty() {
1694 let fx = Fixture::new();
1695 fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
1696 fx.write("wiki/people/b.md", "wiki-page", "B", "");
1697 let slice = neighborhood(
1698 &fx.store,
1699 &fx.p("wiki/people/a.md"),
1700 0,
1701 &[],
1702 Direction::Both,
1703 )
1704 .unwrap();
1705 assert_eq!(slice.seed, fx.p("wiki/people/a"));
1706 assert!(slice.nodes.is_empty());
1707 }
1708
1709 #[test]
1710 fn neighborhood_outgoing_one_hop_reads_summary_and_type() {
1711 let fx = Fixture::new();
1712 fx.write(
1713 "wiki/people/a.md",
1714 "wiki-page",
1715 "Person A",
1716 "Knows [[records/contacts/b]].",
1717 );
1718 fx.write("records/contacts/b.md", "contact", "Contact B summary", "");
1719 let slice = neighborhood(
1720 &fx.store,
1721 &fx.p("wiki/people/a.md"),
1722 1,
1723 &[],
1724 Direction::Outgoing,
1725 )
1726 .unwrap();
1727 assert_eq!(slice.nodes.len(), 1);
1728 let n = &slice.nodes[0];
1729 assert_eq!(n.path, fx.p("records/contacts/b"));
1730 assert_eq!(n.summary, "Contact B summary");
1731 assert_eq!(n.type_.as_deref(), Some("contact"));
1732 assert_eq!(n.hops, 1);
1733 assert_eq!(n.via, Some((fx.p("wiki/people/a"), Direction::Outgoing)));
1734 }
1735
1736 #[test]
1737 fn neighborhood_incoming_only_walks_backlinks() {
1738 let fx = Fixture::new();
1739 // a -> seed (incoming to seed). seed -> c (outgoing from seed).
1740 fx.write(
1741 "records/profiles/seed.md",
1742 "profile",
1743 "Seed",
1744 "Out to [[records/profiles/c]].",
1745 );
1746 fx.write(
1747 "records/profiles/a.md",
1748 "profile",
1749 "A",
1750 "In to [[records/profiles/seed]].",
1751 );
1752 fx.write("records/profiles/c.md", "profile", "C", "");
1753 fx.reindex();
1754 let slice = neighborhood(
1755 &fx.store,
1756 &fx.p("records/profiles/seed.md"),
1757 1,
1758 &[],
1759 Direction::Incoming,
1760 )
1761 .unwrap();
1762 // Incoming direction: only `a` (which links TO seed), not `c`.
1763 assert_eq!(
1764 paths(
1765 &slice
1766 .nodes
1767 .iter()
1768 .map(|n| n.path.clone())
1769 .collect::<Vec<_>>()
1770 ),
1771 vec!["records/profiles/a"]
1772 );
1773 assert_eq!(
1774 slice.nodes[0].via,
1775 Some((fx.p("records/profiles/seed"), Direction::Incoming))
1776 );
1777 }
1778
1779 #[test]
1780 fn neighborhood_bounded_bfs_respects_hop_limit_and_min_distance() {
1781 let fx = Fixture::new();
1782 // Chain a -> b -> c -> d, all outgoing.
1783 fx.write("wiki/c/a.md", "wiki-page", "A", "[[wiki/c/b]]");
1784 fx.write("wiki/c/b.md", "wiki-page", "B", "[[wiki/c/c]]");
1785 fx.write("wiki/c/c.md", "wiki-page", "C", "[[wiki/c/d]]");
1786 fx.write("wiki/c/d.md", "wiki-page", "D", "");
1787 let slice =
1788 neighborhood(&fx.store, &fx.p("wiki/c/a.md"), 2, &[], Direction::Outgoing).unwrap();
1789 // 2 hops reaches b (1) and c (2), not d (3).
1790 let by_path: HashMap<String, u32> = slice
1791 .nodes
1792 .iter()
1793 .map(|n| (n.path.to_string_lossy().to_string(), n.hops))
1794 .collect();
1795 assert_eq!(by_path.get("wiki/c/b").copied(), Some(1));
1796 assert_eq!(by_path.get("wiki/c/c").copied(), Some(2));
1797 assert_eq!(by_path.get("wiki/c/d"), None);
1798 assert_eq!(slice.nodes.len(), 2);
1799 }
1800
1801 #[test]
1802 fn neighborhood_records_min_hops_on_diamond() {
1803 let fx = Fixture::new();
1804 // Diamond: a -> b, a -> c, b -> d, c -> d. d is reachable at hop 2 from
1805 // either branch; it must be recorded once, at hop 2.
1806 fx.write("wiki/d/a.md", "wiki-page", "A", "[[wiki/d/b]] [[wiki/d/c]]");
1807 fx.write("wiki/d/b.md", "wiki-page", "B", "[[wiki/d/d]]");
1808 fx.write("wiki/d/c.md", "wiki-page", "C", "[[wiki/d/d]]");
1809 fx.write("wiki/d/d.md", "wiki-page", "D", "");
1810 let slice =
1811 neighborhood(&fx.store, &fx.p("wiki/d/a.md"), 3, &[], Direction::Outgoing).unwrap();
1812 let d_nodes: Vec<&ContextNode> = slice
1813 .nodes
1814 .iter()
1815 .filter(|n| n.path == fx.p("wiki/d/d"))
1816 .collect();
1817 assert_eq!(d_nodes.len(), 1, "d must appear exactly once");
1818 assert_eq!(d_nodes[0].hops, 2, "d's min distance from a is 2");
1819 // b and c at hop 1, d at hop 2 => 3 nodes total, no cycle blowup.
1820 assert_eq!(slice.nodes.len(), 3);
1821 }
1822
1823 #[test]
1824 fn neighborhood_type_filter_narrows_results_but_not_traversal() {
1825 let fx = Fixture::new();
1826 // seed -> contact -> meeting. Filtering to `meeting` must still reach
1827 // the meeting THROUGH the (excluded) contact at hop 2.
1828 fx.write(
1829 "wiki/people/seed.md",
1830 "wiki-page",
1831 "Seed",
1832 "[[records/contacts/sarah]]",
1833 );
1834 fx.write(
1835 "records/contacts/sarah.md",
1836 "contact",
1837 "Sarah",
1838 "[[records/meetings/m1]]",
1839 );
1840 fx.write("records/meetings/m1.md", "meeting", "Renewal call", "");
1841 let only_meetings = vec!["meeting".to_string()];
1842 let slice = neighborhood(
1843 &fx.store,
1844 &fx.p("wiki/people/seed.md"),
1845 2,
1846 &only_meetings,
1847 Direction::Outgoing,
1848 )
1849 .unwrap();
1850 // Only the meeting is returned; the contact is traversed but filtered out.
1851 assert_eq!(slice.nodes.len(), 1);
1852 assert_eq!(slice.nodes[0].path, fx.p("records/meetings/m1"));
1853 assert_eq!(slice.nodes[0].type_.as_deref(), Some("meeting"));
1854 assert_eq!(slice.nodes[0].hops, 2);
1855 }
1856
1857 #[test]
1858 fn neighborhood_capped_bounds_traversal_not_just_output() {
1859 // REGRESSION (finding #16): `neighborhood` expands every reached node, and
1860 // each incoming-edge expansion is a full-store scan, so the per-node cost
1861 // is O(visited × store). The CLI's `--limit` was applied post-hoc as a
1862 // `.take(n)` on the RESULT, which caps printed nodes but NOT the traversal
1863 // — the scans still fire for every reachable node. `neighborhood_capped`
1864 // bounds the traversal itself: once `max_nodes` distinct nodes are
1865 // admitted, the BFS stops discovering (and therefore stops scanning).
1866 //
1867 // Structure proving traversal — not just output — is bounded:
1868 // seed -> a, b, c (hop 1, discovered in sorted order: a, b, c)
1869 // a -> deep (hop 2, reachable ONLY by expanding `a`)
1870 // Cap at 2: admit `a` and `b`, stop before `c` and before any hop-2
1871 // expansion. `deep` is therefore unreachable. A post-hoc `.take(2)` would
1872 // have traversed the whole graph (reaching `deep`) and only then truncated
1873 // — so the absence of `deep` is observable proof the traversal stopped.
1874 let fx = Fixture::new();
1875 fx.write(
1876 "wiki/n/seed.md",
1877 "wiki-page",
1878 "Seed",
1879 "[[wiki/n/a]] [[wiki/n/b]] [[wiki/n/c]]",
1880 );
1881 fx.write("wiki/n/a.md", "wiki-page", "A", "[[wiki/n/deep]]");
1882 fx.write("wiki/n/b.md", "wiki-page", "B", "");
1883 fx.write("wiki/n/c.md", "wiki-page", "C", "");
1884 fx.write("wiki/n/deep.md", "wiki-page", "Deep", "");
1885
1886 // Uncapped over 3 hops: all four reachable nodes appear (a, b, c at hop 1,
1887 // deep at hop 2) — the full set the cap is measured against.
1888 let full = neighborhood(
1889 &fx.store,
1890 &fx.p("wiki/n/seed.md"),
1891 3,
1892 &[],
1893 Direction::Outgoing,
1894 )
1895 .unwrap();
1896 assert_eq!(
1897 paths(
1898 &full
1899 .nodes
1900 .iter()
1901 .map(|n| n.path.clone())
1902 .collect::<Vec<_>>()
1903 ),
1904 vec!["wiki/n/a", "wiki/n/b", "wiki/n/c", "wiki/n/deep"],
1905 "uncapped traversal reaches every node within the hop budget"
1906 );
1907
1908 // Capped at 2 over the SAME hop budget: exactly the first two hop-1 nodes,
1909 // and crucially NOT `deep` — the cap halted the BFS before any node was
1910 // expanded into hop 2, so the deep node was never traversed to.
1911 let capped = neighborhood_capped(
1912 &fx.store,
1913 &fx.p("wiki/n/seed.md"),
1914 3,
1915 &[],
1916 Direction::Outgoing,
1917 Some(2),
1918 )
1919 .unwrap();
1920 assert_eq!(
1921 paths(
1922 &capped
1923 .nodes
1924 .iter()
1925 .map(|n| n.path.clone())
1926 .collect::<Vec<_>>()
1927 ),
1928 vec!["wiki/n/a", "wiki/n/b"],
1929 "the cap bounds traversal: only the first 2 nodes are reached, and the \
1930 hop-2 `deep` node (reachable only by expanding a capped-out node) is \
1931 never traversed"
1932 );
1933
1934 // `max_nodes = None` is exactly the unbounded `neighborhood` behavior.
1935 let uncapped = neighborhood_capped(
1936 &fx.store,
1937 &fx.p("wiki/n/seed.md"),
1938 3,
1939 &[],
1940 Direction::Outgoing,
1941 None,
1942 )
1943 .unwrap();
1944 assert_eq!(
1945 uncapped.nodes.len(),
1946 full.nodes.len(),
1947 "None cap matches the unbounded neighborhood result"
1948 );
1949 }
1950
1951 #[test]
1952 fn neighborhood_capped_both_direction_caps_the_node_count() {
1953 // The CLI always passes `Direction::Both` (the per-node backlinks scan is
1954 // the expensive path the cap exists to bound). The cap gates discovery in
1955 // any direction, so a hub linked from many nodes is still bounded.
1956 let fx = Fixture::new();
1957 fx.write("records/profiles/hub.md", "profile", "Hub", "");
1958 for n in ["a", "b", "c", "d", "e"] {
1959 fx.write(
1960 &format!("records/profiles/{n}.md"),
1961 "profile",
1962 n,
1963 "[[records/profiles/hub]]",
1964 );
1965 }
1966 fx.reindex();
1967
1968 let capped = neighborhood_capped(
1969 &fx.store,
1970 &fx.p("records/profiles/hub.md"),
1971 1,
1972 &[],
1973 Direction::Both,
1974 Some(3),
1975 )
1976 .unwrap();
1977 assert_eq!(
1978 capped.nodes.len(),
1979 3,
1980 "Both-direction neighborhood is bounded to the node cap"
1981 );
1982
1983 // Without the cap the same call returns all five backlinking nodes,
1984 // proving the cap (not the data) limited the set.
1985 let uncapped = neighborhood(
1986 &fx.store,
1987 &fx.p("records/profiles/hub.md"),
1988 1,
1989 &[],
1990 Direction::Both,
1991 )
1992 .unwrap();
1993 assert_eq!(uncapped.nodes.len(), 5);
1994 }
1995
1996 #[test]
1997 fn neighborhood_cycle_terminates() {
1998 let fx = Fixture::new();
1999 // a <-> b cycle. Must not loop forever; each appears once.
2000 fx.write("wiki/g/a.md", "wiki-page", "A", "[[wiki/g/b]]");
2001 fx.write("wiki/g/b.md", "wiki-page", "B", "[[wiki/g/a]]");
2002 fx.reindex();
2003 let slice =
2004 neighborhood(&fx.store, &fx.p("wiki/g/a.md"), 10, &[], Direction::Both).unwrap();
2005 // From a: b is the only other node (a is the seed, excluded).
2006 assert_eq!(
2007 paths(
2008 &slice
2009 .nodes
2010 .iter()
2011 .map(|n| n.path.clone())
2012 .collect::<Vec<_>>()
2013 ),
2014 vec!["wiki/g/b"]
2015 );
2016 }
2017
2018 // ── orphans ──────────────────────────────────────────────────────────────
2019
2020 #[test]
2021 fn orphans_finds_files_with_no_edges_either_direction() {
2022 let fx = Fixture::new();
2023 // Wired pair: a links to b (a has outgoing, b has incoming).
2024 fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
2025 fx.write("wiki/people/b.md", "wiki-page", "B", "");
2026 // Orphan: no links in or out.
2027 fx.write(
2028 "sources/emails/lonely.md",
2029 "email",
2030 "Lonely email",
2031 "Just text, no links.",
2032 );
2033 let got = orphans(&fx.store, None).unwrap();
2034 assert_eq!(paths(&got), vec!["sources/emails/lonely.md"]);
2035 }
2036
2037 #[test]
2038 fn orphans_file_with_only_broken_outgoing_link_is_orphan() {
2039 let fx = Fixture::new();
2040 // Broken targets are validation issues, not graph edges to another
2041 // store file. A file whose only link points nowhere is still an orphan.
2042 fx.write(
2043 "records/profiles/a.md",
2044 "profile",
2045 "A",
2046 "[[records/contacts/ghost]]",
2047 );
2048 let got = orphans(&fx.store, None).unwrap();
2049 assert!(
2050 paths(&got).contains(&"records/profiles/a.md".to_string()),
2051 "broken outgoing links must not wire the graph: {got:?}"
2052 );
2053 }
2054
2055 #[test]
2056 fn orphans_file_with_only_incoming_is_not_orphan() {
2057 let fx = Fixture::new();
2058 // `target` has no outgoing links but IS linked to by `linker` — not an orphan.
2059 fx.write("records/contacts/target.md", "contact", "Target", "");
2060 fx.write(
2061 "records/profiles/linker.md",
2062 "profile",
2063 "Linker",
2064 "[[records/contacts/target]]",
2065 );
2066 let got = orphans(&fx.store, None).unwrap();
2067 assert!(
2068 !paths(&got).contains(&"records/contacts/target.md".to_string()),
2069 "incoming-only is not an orphan: {got:?}"
2070 );
2071 // `linker` has outgoing, so also not an orphan.
2072 assert!(!paths(&got).contains(&"records/profiles/linker.md".to_string()));
2073 }
2074
2075 #[test]
2076 fn orphans_incoming_link_from_other_layer_unorphans() {
2077 let fx = Fixture::new();
2078 // Candidate in records/, only incoming edge comes from sources/ — a
2079 // cross-layer link must still un-orphan it even when scoped to records.
2080 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
2081 fx.write(
2082 "sources/emails/sarah.md",
2083 "email",
2084 "bio",
2085 "[[records/contacts/sarah]]",
2086 );
2087 // A genuine orphan in records/ to prove the scope still returns something.
2088 fx.write("records/contacts/nemo.md", "contact", "Nemo", "");
2089 let got = orphans(&fx.store, Some(Layer::Records)).unwrap();
2090 assert_eq!(paths(&got), vec!["records/contacts/nemo.md"]);
2091 }
2092
2093 #[test]
2094 fn orphans_layer_scope_filters_candidates() {
2095 let fx = Fixture::new();
2096 // Orphans across both layers: one source, and two records (an atomic
2097 // contact + a conclusion `profile`, the former wiki-page).
2098 fx.write("sources/emails/s.md", "email", "S", "no links");
2099 fx.write("records/contacts/r.md", "contact", "R", "");
2100 fx.write("records/profiles/w.md", "profile", "W", "");
2101 // The records scope keeps only the two records-layer orphans.
2102 let only_records = orphans(&fx.store, Some(Layer::Records)).unwrap();
2103 assert_eq!(
2104 paths(&only_records),
2105 vec!["records/contacts/r.md", "records/profiles/w.md"]
2106 );
2107 let only_sources = orphans(&fx.store, Some(Layer::Sources)).unwrap();
2108 assert_eq!(paths(&only_sources), vec!["sources/emails/s.md"]);
2109 // No scope: all three, sorted (records, records, sources).
2110 let all = orphans(&fx.store, None).unwrap();
2111 assert_eq!(
2112 paths(&all),
2113 vec![
2114 "records/contacts/r.md",
2115 "records/profiles/w.md",
2116 "sources/emails/s.md",
2117 ]
2118 );
2119 }
2120
2121 #[test]
2122 fn orphans_self_link_does_not_count_as_an_edge() {
2123 let fx = Fixture::new();
2124 // A page that only links to itself has no real edges => still an orphan.
2125 fx.write(
2126 "records/synthesis/solo.md",
2127 "synthesis",
2128 "Solo",
2129 "I reference [[records/synthesis/solo]] only.",
2130 );
2131 let got = orphans(&fx.store, None).unwrap();
2132 assert_eq!(paths(&got), vec!["records/synthesis/solo.md"]);
2133 }
2134
2135 #[test]
2136 fn orphans_excludes_index_and_db_files() {
2137 let fx = Fixture::new();
2138 // A lone index.md / DB.md must never be reported as an orphan content file.
2139 fx.write_raw(
2140 "records/index.md",
2141 "---\ntype: index\nscope: layer\nfolder: records\n---\n# records\n",
2142 );
2143 fx.write(
2144 "records/profiles/real-orphan.md",
2145 "profile",
2146 "Real",
2147 "no links",
2148 );
2149 let got = orphans(&fx.store, None).unwrap();
2150 assert_eq!(paths(&got), vec!["records/profiles/real-orphan.md"]);
2151 }
2152
2153 // ── frontmatter_block helper ─────────────────────────────────────────────
2154
2155 #[test]
2156 fn frontmatter_block_extracts_between_fences() {
2157 let text = "---\ntype: contact\nsummary: hi\n---\nbody here\n";
2158 assert_eq!(
2159 frontmatter_block(text),
2160 Some("type: contact\nsummary: hi\n")
2161 );
2162 }
2163
2164 #[test]
2165 fn frontmatter_block_none_without_leading_fence() {
2166 let text = "no frontmatter here\n";
2167 assert_eq!(frontmatter_block(text), None);
2168 }
2169
2170 #[test]
2171 fn frontmatter_block_tolerates_leading_bom() {
2172 // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
2173 // fence must not hide the frontmatter from the graph layer — otherwise a
2174 // BOM-prefixed file the catalog indexes contributes no backlinks/edges.
2175 // Pre-fix the `---\n` strip failed on the BOM and returned None.
2176 let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody here\n";
2177 assert_eq!(
2178 frontmatter_block(text),
2179 Some("type: contact\nsummary: hi\n"),
2180 "a leading BOM must not hide frontmatter from the graph layer"
2181 );
2182 }
2183
2184 // ── shared edge notion: whitespace / fence / case / containment ──────────
2185
2186 /// Padded `[[ x ]]` must be a forward edge AND (after reindex) a backward
2187 /// edge — the two views agreeing on the same edge in a clean store.
2188 #[test]
2189 fn padded_link_is_both_a_forward_and_backward_edge() {
2190 let fx = Fixture::new();
2191 fx.write(
2192 "records/contacts/sarah.md",
2193 "contact",
2194 "Sarah",
2195 "the contact",
2196 );
2197 fx.write(
2198 "records/profiles/a.md",
2199 "profile",
2200 "A",
2201 "See [[ records/contacts/sarah ]] today.",
2202 );
2203 fx.reindex();
2204
2205 assert_eq!(
2206 paths(&forwardlinks(&fx.store, Path::new("records/profiles/a.md")).unwrap()),
2207 vec!["records/contacts/sarah"],
2208 "padded link is a forward edge"
2209 );
2210 assert_eq!(
2211 paths(&backlinks(&fx.store, Path::new("records/contacts/sarah.md")).unwrap()),
2212 vec!["records/profiles/a"],
2213 "padded link is the SAME backward edge (forward and backward agree)"
2214 );
2215 }
2216
2217 /// A `[[...]]` only inside a fenced code block is a documentation example,
2218 /// not an edge: no forward edge, no backward edge, and the source page is an
2219 /// orphan (no real links). Matches validate's fence-aware extractor.
2220 #[test]
2221 fn fenced_link_is_not_an_edge_and_page_is_orphan() {
2222 let fx = Fixture::new();
2223 fx.write(
2224 "records/contacts/sarah.md",
2225 "contact",
2226 "Sarah",
2227 "the contact",
2228 );
2229 fx.write(
2230 "records/synthesis/howto.md",
2231 "synthesis",
2232 "Howto",
2233 "```markdown\n[[records/contacts/sarah]] is how you link.\n```",
2234 );
2235 fx.reindex();
2236
2237 assert!(
2238 forwardlinks(&fx.store, Path::new("records/synthesis/howto.md"))
2239 .unwrap()
2240 .is_empty(),
2241 "a fenced example is not a forward edge"
2242 );
2243 assert!(
2244 backlinks(&fx.store, Path::new("records/contacts/sarah.md"))
2245 .unwrap()
2246 .is_empty(),
2247 "a fenced example is not a backward edge"
2248 );
2249 let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2250 assert!(
2251 orphan_set.contains(&"records/synthesis/howto.md".to_string()),
2252 "a page whose only link is fenced has no real edges => orphan: {orphan_set:?}"
2253 );
2254 }
2255
2256 /// `rename` must NOT rewrite a `[[...]]` inside a fenced code block (it is
2257 /// verbatim documentation, not an edge), while still rewriting a real link.
2258 #[test]
2259 fn rewrite_links_to_leaves_fenced_examples_untouched() {
2260 let input = "\
2261Real [[records/contacts/sarah]] link.
2262
2263```markdown
2264Example: [[records/contacts/sarah]] inside a fence.
2265```
2266
2267Trailing [[records/contacts/sarah]].
2268";
2269 let got = rewrite_links_to(
2270 input,
2271 Path::new("records/contacts/sarah"),
2272 Path::new("records/contacts/sarah-chen"),
2273 );
2274 // The two non-fenced links retarget; the fenced one is verbatim.
2275 assert!(
2276 got.contains("Real [[records/contacts/sarah-chen]] link."),
2277 "real link before the fence must retarget"
2278 );
2279 assert!(
2280 got.contains("Trailing [[records/contacts/sarah-chen]]."),
2281 "real link after the fence must retarget"
2282 );
2283 assert!(
2284 got.contains("Example: [[records/contacts/sarah]] inside a fence."),
2285 "fenced example must stay verbatim, got:\n{got}"
2286 );
2287 }
2288
2289 /// `rewrite_links_to` matches a padded link and preserves the display.
2290 #[test]
2291 fn rewrite_links_to_matches_padded_link() {
2292 let got = rewrite_links_to(
2293 "See [[ records/contacts/sarah |Sarah]] today.",
2294 Path::new("records/contacts/sarah"),
2295 Path::new("records/contacts/sarah-chen"),
2296 );
2297 assert_eq!(got, "See [[records/contacts/sarah-chen|Sarah]] today.");
2298 }
2299
2300 /// On a case-insensitive filesystem a case-variant link is the same edge:
2301 /// backlinks finds it, orphans does NOT falsely orphan the target, and
2302 /// rename rewrites it. On a case-sensitive FS the link is genuinely a
2303 /// different target, so the test is skipped.
2304 #[cfg(unix)]
2305 #[test]
2306 fn case_variant_link_is_one_edge_on_case_insensitive_fs() {
2307 // Probe the filesystem the same way the production code does
2308 // (`link_edge_key` is imported at module scope).
2309 if link_edge_key("A") != link_edge_key("a") {
2310 // case-sensitive filesystem: the case-variant link is a different
2311 // target, so this scenario doesn't apply.
2312 return;
2313 }
2314 let fx = Fixture::new();
2315 fx.write(
2316 "records/contacts/sarah-chen.md",
2317 "contact",
2318 "Sarah",
2319 "the contact",
2320 );
2321 fx.write(
2322 "records/profiles/bio.md",
2323 "profile",
2324 "Bio",
2325 "See [[records/contacts/Sarah-Chen]].",
2326 );
2327 fx.reindex();
2328
2329 assert_eq!(
2330 paths(&backlinks(&fx.store, Path::new("records/contacts/sarah-chen.md")).unwrap()),
2331 vec!["records/profiles/bio"],
2332 "case-variant incoming link must be a backward edge"
2333 );
2334 let orphan_set = paths(&orphans(&fx.store, None).unwrap());
2335 assert!(
2336 !orphan_set.contains(&"records/contacts/sarah-chen.md".to_string()),
2337 "a target with a live case-variant incoming link must NOT be orphaned: {orphan_set:?}"
2338 );
2339
2340 let rewritten = rewrite_links_to(
2341 "See [[records/contacts/Sarah-Chen]].",
2342 Path::new("records/contacts/sarah-chen"),
2343 Path::new("records/contacts/sarah"),
2344 );
2345 assert_eq!(
2346 rewritten, "See [[records/contacts/sarah]].",
2347 "rename must rewrite the case-variant link on a case-insensitive FS"
2348 );
2349 }
2350
2351 /// A `[[../outside/x]]` escaping wiki-link is never a forward edge, and a
2352 /// `neighborhood` from the escaping page never reads or traverses through the
2353 /// external file — closing the disclosure vector.
2354 #[cfg(unix)]
2355 #[test]
2356 fn escaping_link_is_not_an_edge_and_neighborhood_does_not_escape() {
2357 let fx = Fixture::new();
2358 // An external file OUTSIDE the store root, with its own in-store link.
2359 let outside_dir = fx.store.root.parent().unwrap().join("outside");
2360 fs::create_dir_all(&outside_dir).unwrap();
2361 fs::write(
2362 outside_dir.join("secret.md"),
2363 "---\ntype: note\nsummary: TOPSECRET\n---\nLinks [[records/contacts/sarah]].\n",
2364 )
2365 .unwrap();
2366 fx.write(
2367 "records/contacts/sarah.md",
2368 "contact",
2369 "Sarah",
2370 "the contact",
2371 );
2372 fx.write(
2373 "wiki/topics/traversal.md",
2374 "wiki-page",
2375 "Traversal",
2376 "See [[../outside/secret]].",
2377 );
2378 fx.reindex();
2379
2380 // The escaping target is not a forward edge.
2381 assert!(
2382 forwardlinks(&fx.store, Path::new("wiki/topics/traversal.md"))
2383 .unwrap()
2384 .is_empty(),
2385 "an escaping `[[../outside/secret]]` must not be a forward edge"
2386 );
2387
2388 // Neighborhood from the escaping page reaches nothing through the
2389 // external file (the external file is never read/traversed).
2390 let slice = neighborhood(
2391 &fx.store,
2392 Path::new("wiki/topics/traversal.md"),
2393 2,
2394 &[],
2395 Direction::Outgoing,
2396 )
2397 .unwrap();
2398 assert!(
2399 slice
2400 .nodes
2401 .iter()
2402 .all(|n| !n.path.to_string_lossy().contains("outside")),
2403 "neighborhood must not read/traverse the external file: {:?}",
2404 slice.nodes
2405 );
2406 }
2407}