dbmd_core/graph.rs
1//! `graph` — the wiki-link **relationship layer**.
2//!
3//! Wiki-links are curated-relevance edges (the LLM wrote them), so the graph's
4//! job is to **assemble the relevant context around a seed**, not to be
5//! analyzed. **All ops are on-demand — there is no maintained graph** (a
6//! persistent graph is the roadmap engine).
7//!
8//! [`backlinks`] / [`forwardlinks`] are loop ops (O(changed), never O(store)).
9//! [`neighborhood`] is the high-value context-hydration op. [`orphans`] is a
10//! SWEEP curation worklist.
11//!
12//! Whole-graph analytics (connected components, cycle detection, shortest
13//! path, sinks/sources, DOT/JSON export) are deliberately **not** here — a
14//! human studying the graph opens the store in Obsidian; broken-link detection
15//! is [`crate::validate`]'s job (`WIKI_LINK_BROKEN`).
16//!
17//! ## Implementation note — two paths for the incoming-edge scan
18//!
19//! The scale contract (SPEC § Tooling, plan: *"the interactive loop is
20//! O(changed), never O(store)"*) is the load-bearing rule here. [`backlinks`]
21//! is a loop op, so it must **not** open and `read_to_string` every content file
22//! in the store on each call. It resolves incoming edges by one of two paths,
23//! chosen by whether the call is scoped:
24//!
25//! - **Unscoped** (`dbmd graph backlinks <x>`, no `--type`/`--in`): one
26//! embedded-ripgrep pass for the literal `[[<target>]]` over the tree, via
27//! [`Store::find_links_to`] (`grep` + `ignore`, early-exit per file) — the
28//! same scan engine [`crate::validate`]'s working-set incoming-linker step
29//! uses. A single store traversal with cheap presence-only matching, not N
30//! whole-file parses; that is what keeps the unscoped call inside the loop
31//! budget. [`backlinks`] then filters the raw hits to content files and emits
32//! canonical bare targets (its relationship view), where the lower-level
33//! [`Store::find_links_to`] returns every `.md` the text appears in.
34//! - **Scoped** (`--type` / `--in`): the candidate set is enumerated from the
35//! relevant type-folder `index.jsonl` sidecars — one sequential read per
36//! type-folder (via [`crate::query::Query`], which sits on
37//! [`Store::read_type_index`]) — and each candidate is confirmed by a
38//! single-file parse. That is what makes `--type` / `--in` an *I/O* scope, not
39//! just a result filter: a typed/layer-scoped `backlinks` reads only the
40//! relevant folder(s)' sidecars and parses only those files.
41//!
42//! **Why the scoped path confirms by parsing the candidate, not by trusting the
43//! sidecar's `links` field.** A sidecar record's `links` is the file's
44//! *frontmatter* `links:` list only — it does **not** capture wiki-links written
45//! in the body or inside other typed frontmatter fields (`company: [[…]]`,
46//! `attendees: [ … ]`, `derived_from: [ … ]`). [`forwardlinks`] extracts edges
47//! from the whole file, so to keep the two directions on the **same** edge set
48//! (an incoming edge to X is exactly: some file whose [`forwardlinks`] contains
49//! X) the incoming-edge confirmation re-parses each candidate file the same way.
50//! The sidecar bounds *which* files are candidates; the parse decides whether
51//! each truly links. The unscoped ripgrep path stays on that same edge set by
52//! matching the link text wherever it lives in the file (frontmatter or body).
53//! A node's `summary` / `type` likewise read frontmatter directly (the source of
54//! truth the sidecar is derived from; never stale).
55
56use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
57use std::io;
58use std::path::{Path, PathBuf};
59
60use ignore::WalkBuilder;
61use regex::Regex;
62
63use crate::index::IndexRecord;
64use crate::query::Query;
65use crate::store::{Layer, Store, StoreError};
66
67/// Which edge directions a traversal follows.
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
69pub enum Direction {
70 /// Incoming edges only (backlinks).
71 Incoming,
72 /// Outgoing edges only (forwardlinks).
73 Outgoing,
74 /// Both directions.
75 Both,
76}
77
78/// One node reached during a [`neighborhood`] hydration: the file, its
79/// `summary`, and how it connects back toward the seed.
80#[derive(Debug, Clone, PartialEq, Eq)]
81pub struct ContextNode {
82 /// The store-relative path of the reached file.
83 pub path: PathBuf,
84 /// The file's `summary` (read from its sidecar entry / frontmatter).
85 pub summary: String,
86 /// The file's `type`, when known.
87 pub type_: Option<String>,
88 /// Hop distance from the seed (the seed itself is 0).
89 pub hops: u32,
90 /// The relationship edge that brought this node into the slice: the path it
91 /// links to/from one hop closer to the seed, and the direction.
92 pub via: Option<(PathBuf, Direction)>,
93}
94
95/// The readable working-set digest [`neighborhood`] returns: the seed plus the
96/// reached nodes with their summaries and connections. The relationship-axis
97/// "turn a seed into context" primitive.
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub struct ContextSlice {
100 /// The seed the slice was hydrated from.
101 pub seed: PathBuf,
102 /// The reached nodes (excluding the seed), in BFS order.
103 pub nodes: Vec<ContextNode>,
104}
105
106/// Incoming edges to `path`: files that wiki-link to it. The blast-radius /
107/// dependents primitive before an edit. Store-wide (every layer / every type);
108/// see [`backlinks_filtered`] for the `--type` / `--in`-scoped form.
109///
110/// `path` is the store-relative target as it would be written inside a
111/// wiki-link (with or without a trailing `.md`; both resolve to the same
112/// target). Returns each linking file as its **canonical bare wiki-link path**
113/// (store-relative, no `.md`) — the same key [`forwardlinks`] emits, so the two
114/// directions round-trip and [`neighborhood`] can use one node identity.
115/// Deduped, sorted, never including the seed itself.
116pub fn backlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
117 backlinks_filtered(store, path, &[], None)
118}
119
120/// Incoming edges to `path`, scoped by the linking file's `type` and/or layer —
121/// the `dbmd graph backlinks --type/--in` surface.
122///
123/// **Scale (the loop contract).** Two paths, by whether the call is scoped:
124///
125/// - **Unscoped** (`types` empty *and* `layer` `None`): one embedded-ripgrep
126/// pass for `[[<target>]]` across the store via [`Store::find_links_to`] — a
127/// single `grep` + `ignore` traversal with early-exit per file, never a
128/// `read_to_string` of every content file. This is the same scan engine
129/// [`crate::validate::validate_working_set`]'s incoming-linker step rides, and
130/// it keeps the unscoped call inside the loop budget (the old per-candidate
131/// confirm-read re-opened every file in the store → O(store)).
132/// - **Scoped** (`types` and/or `layer` set): the candidate set — the files that
133/// *might* link to `path` — is read from the relevant type-folder
134/// `index.jsonl` sidecars, so the call touches only the named folder(s):
135/// O(folder), the sanctioned loop cost. Each candidate is then confirmed by a
136/// single-file parse. When `types` lists several types, every named type's
137/// folder is read and the candidate sets unioned; a `layer` further restricts
138/// the candidate paths to that layer.
139///
140/// **Correctness (one edge set, both paths).** An incoming edge to X is exactly:
141/// some file whose [`forwardlinks`] contains X — a wiki-link in the body or in
142/// *any* frontmatter field (`company: [[…]]`, `attendees: [ … ]`), not just the
143/// sidecar's frontmatter `links:` projection. Both paths honor that:
144/// - The unscoped scan matches the literal `[[<target>]]` text wherever it lives
145/// in a file (frontmatter or body), the same edges [`forwardlinks`] extracts.
146/// [`Store::find_links_to`] returns *every* `.md` carrying the link text
147/// (including `index.md` catalogs); [`backlinks`] is the relationship view, so
148/// the results are filtered to content files ([`is_content_rel`]) and emitted
149/// as canonical bare targets, self-excluded.
150/// - The scoped path confirms each candidate via [`file_links_to`], which
151/// delegates to [`forwardlinks`] (body + every frontmatter field) — so a
152/// body-only or typed-field edge is caught, not just the sidecar's `links:`
153/// list.
154///
155/// Result form (canonical bare paths, deduped, sorted, seed excluded) is
156/// identical on both paths and matches [`backlinks`].
157pub fn backlinks_filtered(
158 store: &Store,
159 path: &Path,
160 types: &[String],
161 layer: Option<Layer>,
162) -> Result<Vec<PathBuf>, StoreError> {
163 let target = normalize_target(path);
164 if target.is_empty() {
165 return Ok(Vec::new());
166 }
167
168 // Unscoped: one embedded-ripgrep pass over the store (O(store) scan with
169 // early-exit per file), not a per-candidate read of every content file.
170 // `find_links_to` returns every `.md` carrying the link text (incl. catalog
171 // `index.md`); narrow to content files and canonicalize to the bare target
172 // form `backlinks` emits, dropping the seed's self-link.
173 if types.is_empty() && layer.is_none() {
174 let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
175 for rel in store.find_links_to(path)? {
176 if !is_content_rel(&rel) {
177 continue;
178 }
179 let linker = normalize_target(&rel);
180 if linker.is_empty() || linker == target {
181 // A file never counts as its own backlink.
182 continue;
183 }
184 hits.insert(PathBuf::from(linker));
185 }
186 return Ok(hits.into_iter().collect());
187 }
188
189 // Scoped: read only the named folder(s)' sidecars for the candidate set, then
190 // confirm each candidate with a single-file parse — O(folder), the I/O scope
191 // `--type` / `--in` buys.
192 let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
193 for candidate in candidate_records(store, types, layer)? {
194 let rel = &candidate.path;
195 let candidate_target = normalize_target(rel);
196 if candidate_target.is_empty() || candidate_target == target {
197 // A file never counts as its own backlink.
198 continue;
199 }
200 // Confirm the edge by parsing the candidate file the same way
201 // forwardlinks does (body + all frontmatter), so body/typed-field links
202 // are caught — the sidecar's `links` field alone would miss them.
203 if file_links_to(store, rel, &target)? {
204 hits.insert(PathBuf::from(candidate_target));
205 }
206 }
207
208 Ok(hits.into_iter().collect())
209}
210
211/// Outgoing edges from `path`: the wiki-link targets extracted from that single
212/// file. Loop-fast; follow the evidence chain.
213///
214/// `path` is the store-relative path of the file to read. Targets are returned
215/// as store-relative paths (bare, no `.md`), deduped and sorted; the file's
216/// links to itself are dropped. A missing file yields an empty list (a
217/// dangling seed has no outgoing edges to report — broken-link detection is
218/// [`crate::validate`]'s job).
219pub fn forwardlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
220 let self_target = normalize_target(path);
221 let abs = match resolve_existing(store, path) {
222 Some(a) => a,
223 None => return Ok(Vec::new()),
224 };
225 let body = match std::fs::read_to_string(&abs) {
226 Ok(b) => b,
227 // A file that isn't valid UTF-8 (e.g. a binary source) carries no
228 // wiki-links we can extract.
229 Err(e) if e.kind() == io::ErrorKind::InvalidData => return Ok(Vec::new()),
230 Err(e) => return Err(StoreError::Io(e)),
231 };
232
233 let mut out: BTreeSet<PathBuf> = BTreeSet::new();
234 for target in extract_link_targets(&body) {
235 if target.is_empty() || target == self_target {
236 continue;
237 }
238 out.insert(PathBuf::from(target));
239 }
240 Ok(out.into_iter().collect())
241}
242
243/// The candidate set for an incoming-edge scan: the sidecar records that could
244/// link to the target, read from the type-folder `index.jsonl` sidecars (never
245/// a content-tree walk). `types`/`layer` narrow *which* sidecars are read — the
246/// I/O scope that keeps a typed/layer backlinks O(folder).
247///
248/// - `types` non-empty: read each type's folder sidecar (via [`Query`], which
249/// sits on [`Store::read_type_index`]), optionally layer-scoped, and union the
250/// records by path (a file appears once even if two type reads surface it).
251/// - `types` empty: every sidecar record under `layer` (or store-wide when
252/// `None`) via [`Store::sidecar_records`].
253fn candidate_records(
254 store: &Store,
255 types: &[String],
256 layer: Option<Layer>,
257) -> Result<Vec<IndexRecord>, StoreError> {
258 if types.is_empty() {
259 return store.sidecar_records(layer);
260 }
261 let mut by_path: std::collections::BTreeMap<PathBuf, IndexRecord> =
262 std::collections::BTreeMap::new();
263 for type_ in types {
264 let mut q = Query::new().with_type(type_);
265 if let Some(layer) = layer {
266 q = q.with_layer(layer);
267 }
268 for rec in q.execute(store)? {
269 by_path.insert(rec.path.clone(), rec);
270 }
271 }
272 Ok(by_path.into_values().collect())
273}
274
275/// True if the store file at `rel` carries a wiki-link whose canonical target
276/// equals `target`. Delegates to [`forwardlinks`] so the incoming-edge predicate
277/// is *exactly* the outgoing-edge extraction — body + every frontmatter field —
278/// keeping the two directions on one edge set. `forwardlinks` already emits
279/// canonical bare targets, so `target` (likewise normalized by the caller) is
280/// compared directly. A missing/binary file links to nothing.
281fn file_links_to(store: &Store, rel: &Path, target: &str) -> Result<bool, StoreError> {
282 let edges = forwardlinks(store, rel)?;
283 Ok(edges.iter().any(|e| e.as_os_str() == target))
284}
285
286/// **Context hydration.** Bounded BFS from `seed` over backlinks + forwardlinks
287/// out to `hops`, reading each reached file's `summary` + relationship, and
288/// returning a readable [`ContextSlice`]. Optionally filtered by `types` and
289/// `direction`. On-demand; no maintained graph. What the agent reaches for to
290/// assemble a working set in one call.
291///
292/// Traversal semantics:
293/// - **`hops`** bounds true graph distance from the seed. `hops == 0` returns
294/// an empty slice (the seed alone is no context).
295/// - **`direction`** selects which edges are followed: `Incoming` walks
296/// backlinks, `Outgoing` walks forwardlinks, `Both` walks the union.
297/// - **`types`**, when non-empty, filters which reached nodes appear in the
298/// slice — but traversal still passes *through* off-type nodes, so a
299/// `meeting` two hops out is still reachable through a `contact` even when
300/// filtering to `meeting`. (An empty `types` slice imposes no filter.)
301/// - Each node records the lowest hop count at which it is first reached (BFS
302/// order); the seed is never included as a node.
303pub fn neighborhood(
304 store: &Store,
305 seed: &Path,
306 hops: u32,
307 types: &[String],
308 direction: Direction,
309) -> Result<ContextSlice, StoreError> {
310 let seed_rel = PathBuf::from(normalize_target(seed));
311 let type_filter: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
312
313 // `discovered` guards against revisiting a node (and against re-adding the
314 // seed). BFS by levels so the first time we reach a node is its true min
315 // hop distance.
316 let mut discovered: HashSet<PathBuf> = HashSet::new();
317 discovered.insert(seed_rel.clone());
318
319 let mut nodes: Vec<ContextNode> = Vec::new();
320 let mut frontier: VecDeque<PathBuf> = VecDeque::new();
321 frontier.push_back(seed_rel.clone());
322
323 let mut hop = 0u32;
324 while hop < hops && !frontier.is_empty() {
325 hop += 1;
326 let level_size = frontier.len();
327 for _ in 0..level_size {
328 let current = frontier.pop_front().expect("frontier non-empty");
329
330 // Collect this node's edges in the requested direction(s). Each
331 // edge carries the neighbor path + the direction we traversed it.
332 let mut edges: Vec<(PathBuf, Direction)> = Vec::new();
333 if matches!(direction, Direction::Outgoing | Direction::Both) {
334 for nbr in forwardlinks(store, ¤t)? {
335 edges.push((nbr, Direction::Outgoing));
336 }
337 }
338 if matches!(direction, Direction::Incoming | Direction::Both) {
339 for nbr in backlinks(store, ¤t)? {
340 edges.push((nbr, Direction::Incoming));
341 }
342 }
343
344 for (neighbor, dir) in edges {
345 if !discovered.insert(neighbor.clone()) {
346 continue;
347 }
348 let (summary, type_) = read_summary_and_type(store, &neighbor);
349 let include = type_filter.is_empty()
350 || type_
351 .as_deref()
352 .map(|t| type_filter.contains(t))
353 .unwrap_or(false);
354 if include {
355 nodes.push(ContextNode {
356 path: neighbor.clone(),
357 summary,
358 type_,
359 hops: hop,
360 via: Some((current.clone(), dir)),
361 });
362 }
363 // Off-type nodes are not emitted but still seed the next BFS
364 // level, so the type filter narrows the *result*, not the
365 // reachable graph.
366 frontier.push_back(neighbor);
367 }
368 }
369 }
370
371 Ok(ContextSlice {
372 seed: seed_rel,
373 nodes,
374 })
375}
376
377/// **SWEEP.** Content files with no incoming AND no outgoing wiki-links — the
378/// curation worklist ("ingested but not yet wired into the wiki"). Off the
379/// loop. Optionally scoped to a layer.
380///
381/// A file is an orphan iff it neither links out to another store file nor is
382/// linked to by one. Incoming edges are counted across the *whole* store
383/// (a link from any layer un-orphans a file), even when `layer` scopes the
384/// candidate set. Returns store-relative paths, sorted.
385pub fn orphans(store: &Store, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
386 // One walk of the whole store: for every content file, record (a) whether
387 // it has any outgoing link, and (b) accumulate the set of every target any
388 // file links to (its incoming-edge set). Both come from a single read per
389 // file — the SWEEP cost.
390 let all = walk_content_files(store)?;
391
392 let mut linked_to: HashSet<PathBuf> = HashSet::new();
393 let mut has_outgoing: HashMap<PathBuf, bool> = HashMap::new();
394
395 for abs in &all {
396 let rel = match rel_path(store, abs) {
397 Some(r) => r,
398 None => continue,
399 };
400 let self_target = normalize_target(&rel);
401
402 let body = match std::fs::read_to_string(abs) {
403 Ok(b) => b,
404 Err(e) if e.kind() == io::ErrorKind::InvalidData => String::new(),
405 Err(e) => return Err(StoreError::Io(e)),
406 };
407
408 let mut outgoing = false;
409 for target in extract_link_targets(&body) {
410 if target.is_empty() || target == self_target {
411 continue;
412 }
413 if resolve_existing(store, Path::new(&target)).is_none() {
414 continue;
415 }
416 outgoing = true;
417 linked_to.insert(PathBuf::from(target));
418 }
419 has_outgoing.insert(rel, outgoing);
420 }
421
422 let mut out: BTreeSet<PathBuf> = BTreeSet::new();
423 for abs in &all {
424 let rel = match rel_path(store, abs) {
425 Some(r) => r,
426 None => continue,
427 };
428 if let Some(layer) = layer {
429 if path_layer(&rel) != Some(layer) {
430 continue;
431 }
432 }
433 let outgoing = has_outgoing.get(&rel).copied().unwrap_or(false);
434 let incoming = linked_to.contains(&PathBuf::from(normalize_target(&rel)));
435 if !outgoing && !incoming {
436 out.insert(rel);
437 }
438 }
439
440 Ok(out.into_iter().collect())
441}
442
443/// **Write-side.** Rewrite every incoming `[[old]]` wiki-link in `text` to
444/// `[[new]]`, preserving any `|display` override and emitting the canonical bare
445/// target (no `.md`). The write-side twin of [`backlinks`]: where `backlinks`
446/// *finds* the files carrying an edge to `old`, this *retargets* that edge to
447/// `new` inside one file's contents.
448///
449/// `old` and `new` are store-relative paths in the wiki-link sense — both are
450/// passed through the same [`normalize_target`] the read side keys on, so the
451/// `.md` and bare spellings of `old` collapse to one target and a match here is
452/// exactly a match [`backlinks`] / [`Store::find_links_to`](crate::Store::find_links_to)
453/// would report. A link is rewritten iff its normalized target equals
454/// `normalize_target(old)`; prefix collisions (`old=a/b` vs `[[a/bc]]`) and
455/// short-form links never match. Returns the rewritten text (identical to the
456/// input when nothing matched), so the caller can cheaply detect a no-op.
457///
458/// Operates on the raw bytes (not a parser round-trip) so a link in frontmatter
459/// or body is retargeted uniformly and nothing else is reflowed.
460pub fn rewrite_links_to(text: &str, old: &Path, new: &Path) -> String {
461 let old_target = normalize_target(old);
462 let new_target = normalize_target(new);
463 if old_target.is_empty() {
464 // No target to match → never rewrite anything.
465 return text.to_string();
466 }
467
468 let re = rewrite_link_re();
469 let mut out = String::with_capacity(text.len());
470 let mut last = 0usize;
471 for caps in re.captures_iter(text) {
472 let whole = caps.get(0).expect("group 0 always present");
473 let raw_target = caps.get(1).map(|m| m.as_str()).unwrap_or("");
474 // Match on the SAME normalized key the read side uses, so `[[old]]`,
475 // `[[old.md]]`, and `[[./old]]` all retarget and a prefix like
476 // `[[old-jr]]` never does.
477 if normalize_target(Path::new(raw_target)) != old_target {
478 continue;
479 }
480 // Copy the gap since the previous rewrite verbatim, then the rebuilt
481 // link: canonical bare new target + the original display, if any.
482 out.push_str(&text[last..whole.start()]);
483 out.push_str("[[");
484 out.push_str(&new_target);
485 if let Some(display) = caps.get(2) {
486 out.push('|');
487 out.push_str(display.as_str());
488 }
489 out.push_str("]]");
490 last = whole.end();
491 }
492 out.push_str(&text[last..]);
493 out
494}
495
496// ── Private helpers ─────────────────────────────────────────────────────────
497
498/// The wiki-link regex used by the **write side** ([`rewrite_links_to`]):
499/// captures the target (group 1) AND the optional `|display` (group 2) so a
500/// rewrite can re-emit the display verbatim. The target/display character
501/// classes match [`wiki_link_re`] exactly, so the write side recognizes a link
502/// iff the read side does — the two never disagree on what a `[[…]]` is.
503///
504/// Compiled once for the process via [`OnceLock`] — `rewrite_links_to` runs on
505/// the rename/link write path, so recompiling per call would be wasteful.
506fn rewrite_link_re() -> &'static Regex {
507 static RE: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
508 RE.get_or_init(|| {
509 Regex::new(r"\[\[([^\]\|\n]+?)(?:\|([^\]\n]*))?\]\]")
510 .expect("static wiki-link rewrite regex compiles")
511 })
512}
513
514/// Normalize a store-relative path into the canonical wiki-link target form:
515/// forward slashes, no leading `./` or `/`, and no trailing `.md`. This is the
516/// single key that incoming/outgoing edges are compared on, so the `.md` and
517/// bare forms of a target unify.
518fn normalize_target(path: &Path) -> String {
519 let mut s = path.to_string_lossy().replace('\\', "/");
520 while let Some(rest) = s.strip_prefix("./") {
521 s = rest.to_string();
522 }
523 let s = s.trim_start_matches('/');
524 let s = s.strip_suffix(".md").unwrap_or(s);
525 s.trim().to_string()
526}
527
528/// The wiki-link regex: `[[target]]` / `[[target|display]]`. Captures the raw
529/// target (group 1). Compiled once for the process via [`OnceLock`] —
530/// `extract_link_targets` runs on the O(changed) loop path (backlinks /
531/// forwardlinks / neighborhood), so the compile must not repeat per call.
532fn wiki_link_re() -> &'static Regex {
533 // target = anything up to the first `|` or `]`. display (optional) is
534 // discarded. Matches across a single line/body slice.
535 static RE: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
536 RE.get_or_init(|| {
537 Regex::new(r"\[\[([^\]\|\n]+?)(?:\|[^\]\n]*)?\]\]")
538 .expect("static wiki-link regex compiles")
539 })
540}
541
542/// Extract every wiki-link target from a body, normalized to the canonical
543/// store-relative form. Order-preserving; duplicates kept (callers dedup).
544fn extract_link_targets(body: &str) -> Vec<String> {
545 let re = wiki_link_re();
546 re.captures_iter(body)
547 .filter_map(|c| c.get(1))
548 .map(|m| normalize_target(Path::new(m.as_str().trim())))
549 .filter(|t| !t.is_empty())
550 .collect()
551}
552
553/// Resolve the store root + a store-relative path to the absolute on-disk file,
554/// trying the path as written and then with a `.md` extension. `None` if
555/// neither exists.
556fn resolve_existing(store: &Store, store_relative: &Path) -> Option<PathBuf> {
557 let direct = store.root.join(store_relative);
558 if direct.is_file() {
559 return Some(direct);
560 }
561 let normalized = normalize_target(store_relative);
562 let with_md = store.root.join(format!("{normalized}.md"));
563 if with_md.is_file() {
564 return Some(with_md);
565 }
566 None
567}
568
569/// Convert an absolute path under the store root into its store-relative form.
570fn rel_path(store: &Store, abs: &Path) -> Option<PathBuf> {
571 abs.strip_prefix(&store.root).ok().map(|p| p.to_path_buf())
572}
573
574/// Which layer a store-relative path sits in, by its first component.
575fn path_layer(rel: &Path) -> Option<Layer> {
576 let first = rel.components().next()?;
577 match first.as_os_str().to_str()? {
578 "sources" => Some(Layer::Sources),
579 "records" => Some(Layer::Records),
580 "wiki" => Some(Layer::Wiki),
581 _ => None,
582 }
583}
584
585/// True if a store-relative path is a *content* file: under `sources/`,
586/// `records/`, or `wiki/`, a `.md` file, and not an `index.md`. Meta files
587/// (`DB.md`, `log.md`, `log/…`, sidecars) are excluded.
588fn is_content_rel(rel: &Path) -> bool {
589 if path_layer(rel).is_none() {
590 return false;
591 }
592 match rel.extension().and_then(|e| e.to_str()) {
593 Some("md") => {}
594 _ => return false,
595 }
596 rel.file_name().and_then(|n| n.to_str()) != Some("index.md")
597}
598
599/// Walk every content `.md` file in the store via the **`ignore`** walker
600/// (the ripgrep directory engine). Only the three layer roots
601/// (`sources/`/`records/`/`wiki/`) are descended, so `DB.md`, `log.md`, and
602/// `log/` at the store root are structurally never reached; hidden dirs and
603/// per-folder `index.md` sidecars are filtered out ([`is_content_rel`]). Honors
604/// `.gitignore` the way `rg` does. Returns absolute paths. SWEEP-class.
605fn walk_content_files(store: &Store) -> Result<Vec<PathBuf>, StoreError> {
606 let mut out = Vec::new();
607 for layer in Layer::all() {
608 let dir = store.root.join(layer_dir_name(layer));
609 if !dir.is_dir() {
610 continue;
611 }
612 let walker = WalkBuilder::new(&dir)
613 .hidden(true)
614 .git_ignore(true)
615 .git_global(false)
616 .require_git(false)
617 .build();
618 for result in walker {
619 let entry = result.map_err(|e| StoreError::Search {
620 root: store.root.clone(),
621 message: format!("walk failed: {e}"),
622 })?;
623 if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
624 continue;
625 }
626 let abs = entry.into_path();
627 if let Some(rel) = rel_path(store, &abs) {
628 if is_content_rel(&rel) {
629 out.push(abs);
630 }
631 }
632 }
633 }
634 Ok(out)
635}
636
637/// The on-disk folder name for a layer. Mirrors `Layer::dir_name`; kept local
638/// so the graph module owns its own copy rather than coupling to that body.
639fn layer_dir_name(layer: Layer) -> &'static str {
640 match layer {
641 Layer::Sources => "sources",
642 Layer::Records => "records",
643 Layer::Wiki => "wiki",
644 }
645}
646
647/// Read a reached node's `summary` and `type` from its frontmatter. A missing
648/// file, missing frontmatter, or unparseable YAML degrades to an empty summary
649/// / unknown type rather than failing the whole hydration — `neighborhood` is
650/// best-effort context assembly, not validation.
651fn read_summary_and_type(store: &Store, rel: &Path) -> (String, Option<String>) {
652 let abs = match resolve_existing(store, rel) {
653 Some(a) => a,
654 None => return (String::new(), None),
655 };
656 let text = match std::fs::read_to_string(&abs) {
657 Ok(t) => t,
658 Err(_) => return (String::new(), None),
659 };
660 let yaml = match frontmatter_block(&text) {
661 Some(y) => y,
662 None => return (String::new(), None),
663 };
664 let value: serde_norway::Value = match serde_norway::from_str(yaml) {
665 Ok(v) => v,
666 Err(_) => return (String::new(), None),
667 };
668 let summary = value
669 .get("summary")
670 .and_then(|v| v.as_str())
671 .unwrap_or("")
672 .to_string();
673 let type_ = value
674 .get("type")
675 .and_then(|v| v.as_str())
676 .map(|s| s.to_string());
677 (summary, type_)
678}
679
680/// Return the YAML between the opening and closing `---` fences (exclusive), or
681/// `None` if the text has no leading frontmatter block. Local mirror of the
682/// parser's split so the graph module stays self-contained.
683fn frontmatter_block(text: &str) -> Option<&str> {
684 let rest = text
685 .strip_prefix("---\n")
686 .or_else(|| text.strip_prefix("---\r\n"))?;
687 // Find the closing fence: a line that is exactly `---`.
688 let mut idx = 0usize;
689 for line in rest.split_inclusive('\n') {
690 let trimmed = line.trim_end_matches(['\r', '\n']);
691 if trimmed == "---" {
692 return Some(&rest[..idx]);
693 }
694 idx += line.len();
695 }
696 None
697}
698
699#[cfg(test)]
700mod tests {
701 use super::*;
702 use std::fs;
703 use tempfile::TempDir;
704
705 use crate::parser::Config;
706
707 // ── Fixture builder ─────────────────────────────────────────────────────
708 //
709 // A real on-disk store in a tempdir. We write actual files (frontmatter +
710 // wiki-links) and exercise the real code paths. The fixture constructs the
711 // `Store` by its public fields rather than `Store::open`, so the graph
712 // tests stand on their own and do not depend on any other module's
713 // behavior. Each test asserts the behavior the SPEC promises, derived from
714 // intent, never from echoing the function's own output.
715 //
716 // `backlinks` (and `neighborhood` in any incoming direction) enumerate their
717 // candidate set from the type-folder `index.jsonl` sidecars — the loop
718 // contract: never a whole-store content walk. A real db.md store maintains
719 // those sidecars write-through, so a test that exercises backlinks must call
720 // [`Fixture::reindex`] after writing its files to build them (the SWEEP that
721 // `dbmd index rebuild` runs). Forwardlinks/orphans read content directly and
722 // need no sidecar.
723
724 struct Fixture {
725 _tmp: TempDir,
726 store: Store,
727 }
728
729 impl Fixture {
730 fn new() -> Self {
731 let tmp = TempDir::new().expect("tempdir");
732 let root = tmp.path().to_path_buf();
733 fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n# store\n").expect("DB.md");
734 let store = Store {
735 root,
736 config: Config::default(),
737 };
738 Fixture { _tmp: tmp, store }
739 }
740
741 /// Write a content file at a store-relative path with the given type,
742 /// summary, and body. Creates parent dirs.
743 fn write(&self, rel: &str, type_: &str, summary: &str, body: &str) {
744 let abs = self.store.root.join(rel);
745 fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
746 let contents = format!(
747 "---\ntype: {type_}\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: {summary}\n---\n{body}\n"
748 );
749 fs::write(&abs, contents).expect("write file");
750 }
751
752 /// Write a raw file verbatim (for frontmatter-shape edge cases).
753 fn write_raw(&self, rel: &str, contents: &str) {
754 let abs = self.store.root.join(rel);
755 fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
756 fs::write(&abs, contents).expect("write raw");
757 }
758
759 /// Build the type-folder `index.jsonl` sidecars from the content written
760 /// so far — the state a real store is always in (write-through), and the
761 /// candidate set `backlinks` reads. Call after writing files in any test
762 /// that exercises `backlinks` or an incoming-direction `neighborhood`.
763 fn reindex(&self) {
764 crate::index::Index::rebuild_all(&self.store).expect("rebuild sidecars");
765 }
766
767 fn p(&self, rel: &str) -> PathBuf {
768 PathBuf::from(rel)
769 }
770 }
771
772 fn paths(v: &[PathBuf]) -> Vec<String> {
773 v.iter()
774 .map(|p| p.to_string_lossy().replace('\\', "/"))
775 .collect()
776 }
777
778 // ── normalize_target ────────────────────────────────────────────────────
779
780 #[test]
781 fn normalize_strips_md_and_leading_dotslash() {
782 assert_eq!(
783 normalize_target(Path::new("records/contacts/sarah.md")),
784 "records/contacts/sarah"
785 );
786 assert_eq!(
787 normalize_target(Path::new("./wiki/people/elena")),
788 "wiki/people/elena"
789 );
790 assert_eq!(normalize_target(Path::new("/records/x")), "records/x");
791 // Bare and `.md` forms must collapse to the same key, or edges won't unify.
792 assert_eq!(
793 normalize_target(Path::new("a/b")),
794 normalize_target(Path::new("a/b.md"))
795 );
796 }
797
798 // ── extract_link_targets (forwardlinks core) ────────────────────────────
799
800 #[test]
801 fn extract_handles_display_text_and_md_suffix() {
802 let body = "See [[wiki/people/sarah-chen|Sarah]] and [[records/contacts/elena.md]].";
803 let got = extract_link_targets(body);
804 assert_eq!(
805 got,
806 vec!["wiki/people/sarah-chen", "records/contacts/elena"]
807 );
808 }
809
810 #[test]
811 fn extract_ignores_external_markdown_links() {
812 // Standard markdown links are NOT wiki-links and must not be extracted
813 // (SPEC: external refs don't participate in the graph).
814 let body = "[Acme](https://acme.io) but [[records/companies/acme]] is internal.";
815 let got = extract_link_targets(body);
816 assert_eq!(got, vec!["records/companies/acme"]);
817 }
818
819 #[test]
820 fn extract_display_text_is_not_treated_as_a_target() {
821 // A `|display` segment that looks path-like must not become a target;
822 // only the part before `|` is the link target.
823 let body = "[[records/contacts/sarah|sources/emails/decoy]]";
824 let got = extract_link_targets(body);
825 assert_eq!(got, vec!["records/contacts/sarah"]);
826 }
827
828 // ── rewrite_links_to (write-side twin of backlinks) ─────────────────────
829
830 #[test]
831 fn rewrite_plain_link_to_canonical_new_target() {
832 let got = rewrite_links_to(
833 "See [[records/contacts/sarah-chen]] today.",
834 Path::new("records/contacts/sarah-chen"),
835 Path::new("records/contacts/sarah-chen-acme"),
836 );
837 assert_eq!(got, "See [[records/contacts/sarah-chen-acme]] today.");
838 }
839
840 #[test]
841 fn rewrite_preserves_display_override() {
842 let got = rewrite_links_to(
843 "With [[records/contacts/sarah-chen|Sarah]].",
844 Path::new("records/contacts/sarah-chen"),
845 Path::new("records/contacts/sarah-chen-acme"),
846 );
847 assert_eq!(got, "With [[records/contacts/sarah-chen-acme|Sarah]].");
848 }
849
850 #[test]
851 fn rewrite_matches_md_suffixed_old_and_emits_bare_new() {
852 // The `.md` spelling of the old target must match (it normalizes to the
853 // same key the read side uses), and the new target is emitted bare —
854 // the writer doctrine validate enforces (`WIKI_LINK_HAS_EXTENSION`).
855 let got = rewrite_links_to(
856 "[[records/contacts/sarah-chen.md]]",
857 Path::new("records/contacts/sarah-chen"),
858 Path::new("records/contacts/new.md"),
859 );
860 assert_eq!(got, "[[records/contacts/new]]");
861 }
862
863 #[test]
864 fn rewrite_leaves_prefix_collisions_and_short_form_untouched() {
865 // Boundary correctness, anchored to the SAME normalize_target the read
866 // side keys on: `records/contacts/sarah-chen` must NOT match the longer
867 // `[[…-jr]]`, the short-form `[[sarah-chen]]`, or an unrelated target.
868 let input = "[[records/contacts/sarah-chen-jr]] [[sarah-chen]] [[wiki/topics/x]]";
869 let got = rewrite_links_to(
870 input,
871 Path::new("records/contacts/sarah-chen"),
872 Path::new("records/contacts/new"),
873 );
874 assert_eq!(got, input, "no genuine edge to the seed → text unchanged");
875 }
876
877 #[test]
878 fn rewrite_handles_multiple_occurrences_and_mixed_spellings() {
879 let got = rewrite_links_to(
880 "[[records/x]] then [[./records/x]] and [[records/x.md|d]] end",
881 Path::new("records/x"),
882 Path::new("records/y"),
883 );
884 // All three spellings of the same target retarget; the display survives.
885 assert_eq!(
886 got,
887 "[[records/y]] then [[records/y]] and [[records/y|d]] end"
888 );
889 }
890
891 #[test]
892 fn rewrite_retargets_exactly_the_edges_the_core_parser_sees() {
893 // The load-bearing property of moving the rewrite into core: the write
894 // side must operate on EXACTLY the edge set the read side recognizes —
895 // the same `extract_link_targets` / `normalize_target` grammar that
896 // `forwardlinks` is built on. Anchor the test to that grammar (via
897 // `forwardlinks` on a real file) rather than re-listing literals, so a
898 // future divergence between the read parser and the write rewrite fails
899 // here. (Coupled to `forwardlinks` — the single-file edge extractor —
900 // not the multi-file `backlinks` traversal, so it tests the grammar, not
901 // the walk.)
902 let fx = Fixture::new();
903 let body = "Met [[records/contacts/sarah.md|Sarah]] and not [[records/contacts/sarah-2]].";
904 fx.write("wiki/people/bio.md", "wiki-page", "bio", body);
905
906 // Read side: the parser sees two outgoing edges, both in canonical bare
907 // form (the `.md` spelling collapsed). `sarah` is a real edge here.
908 let edges = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
909 assert_eq!(
910 paths(&edges),
911 vec!["records/contacts/sarah", "records/contacts/sarah-2"],
912 "fixture must contain exactly the two edges this test reasons about"
913 );
914
915 // Write side: rewriting `sarah → sarah-chen` must retarget the edge the
916 // parser recognized (matching the `.md` spelling), preserve the display,
917 // and leave the unrelated `sarah-2` edge untouched.
918 let got = rewrite_links_to(
919 body,
920 Path::new("records/contacts/sarah"),
921 Path::new("records/contacts/sarah-chen"),
922 );
923 assert_eq!(
924 got,
925 "Met [[records/contacts/sarah-chen|Sarah]] and not [[records/contacts/sarah-2]]."
926 );
927
928 // Cross-check through the parser: the rewritten text's edge set is the
929 // original with `sarah` swapped for `sarah-chen` — proving the rewrite
930 // moved exactly one edge, the one the read side keyed on.
931 fx.write("wiki/people/bio.md", "wiki-page", "bio", &got);
932 let after = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
933 assert_eq!(
934 paths(&after),
935 vec!["records/contacts/sarah-2", "records/contacts/sarah-chen"],
936 "after rewrite the parser must see the new target and not the old"
937 );
938 }
939
940 #[test]
941 fn rewrite_empty_old_target_is_a_no_op() {
942 // A degenerate `old` (normalizes to empty) must never rewrite anything,
943 // mirroring backlinks' empty-target guard.
944 let input = "[[records/x]] [[]] text";
945 let got = rewrite_links_to(input, Path::new(""), Path::new("records/y"));
946 assert_eq!(got, input);
947 }
948
949 #[test]
950 fn rewrite_no_match_returns_input_unchanged() {
951 let input = "no links, [external](https://x), and [[wiki/topics/y]]";
952 let got = rewrite_links_to(input, Path::new("records/x"), Path::new("records/z"));
953 assert_eq!(got, input);
954 }
955
956 // ── forwardlinks ─────────────────────────────────────────────────────────
957
958 #[test]
959 fn forwardlinks_returns_sorted_deduped_targets_excluding_self() {
960 let fx = Fixture::new();
961 fx.write(
962 "wiki/projects/renewal.md",
963 "wiki-page",
964 "Renewal project",
965 "Links: [[records/contacts/sarah]] [[records/companies/acme]] [[records/contacts/sarah]] and itself [[wiki/projects/renewal]].",
966 );
967 // The targets need not exist on disk for forwardlinks (it reads the one
968 // file only). Self-links are dropped; duplicates collapse; sorted asc.
969 let got = forwardlinks(&fx.store, &fx.p("wiki/projects/renewal.md")).unwrap();
970 assert_eq!(
971 paths(&got),
972 vec!["records/companies/acme", "records/contacts/sarah"]
973 );
974 }
975
976 #[test]
977 fn forwardlinks_picks_up_wiki_links_in_frontmatter() {
978 // SPEC: wiki-links appear in scalar + block-sequence frontmatter fields,
979 // not just the body. forwardlinks must follow those edges too.
980 let fx = Fixture::new();
981 fx.write_raw(
982 "records/meetings/m1.md",
983 "---\ntype: meeting\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Renewal sync\ncompany: [[records/companies/acme]]\nattendees:\n - [[records/contacts/sarah]]\n - [[records/contacts/elena]]\n---\nNotes about [[wiki/projects/renewal]].\n",
984 );
985 let got = forwardlinks(&fx.store, &fx.p("records/meetings/m1.md")).unwrap();
986 assert_eq!(
987 paths(&got),
988 vec![
989 "records/companies/acme",
990 "records/contacts/elena",
991 "records/contacts/sarah",
992 "wiki/projects/renewal",
993 ]
994 );
995 }
996
997 #[test]
998 fn forwardlinks_missing_file_is_empty_not_error() {
999 let fx = Fixture::new();
1000 let got = forwardlinks(&fx.store, &fx.p("wiki/people/ghost.md")).unwrap();
1001 assert!(got.is_empty());
1002 }
1003
1004 #[test]
1005 fn forwardlinks_resolves_seed_given_without_md_extension() {
1006 let fx = Fixture::new();
1007 fx.write(
1008 "wiki/people/sarah.md",
1009 "wiki-page",
1010 "Sarah bio",
1011 "Works at [[records/companies/acme]].",
1012 );
1013 // Seed passed in bare wiki-link form (no `.md`) must still resolve.
1014 let got = forwardlinks(&fx.store, &fx.p("wiki/people/sarah")).unwrap();
1015 assert_eq!(paths(&got), vec!["records/companies/acme"]);
1016 }
1017
1018 // ── backlinks ──────────────────────────────────────────────────────────
1019
1020 #[test]
1021 fn backlinks_finds_incoming_across_layers_and_link_forms() {
1022 let fx = Fixture::new();
1023 // Target.
1024 fx.write("records/contacts/sarah.md", "contact", "Sarah Chen", "");
1025 // Three different incoming-link spellings, all to the same target.
1026 fx.write(
1027 "wiki/people/sarah.md",
1028 "wiki-page",
1029 "bio",
1030 "See [[records/contacts/sarah]].",
1031 );
1032 fx.write(
1033 "records/meetings/m1.md",
1034 "meeting",
1035 "Renewal call",
1036 "Attendee [[records/contacts/sarah|Sarah]].",
1037 );
1038 fx.write(
1039 "sources/emails/e1.md",
1040 "email",
1041 "Hi",
1042 "From [[records/contacts/sarah.md]] today.",
1043 );
1044 // A file that links to a DIFFERENT contact must not be a backlink.
1045 fx.write(
1046 "wiki/people/other.md",
1047 "wiki-page",
1048 "x",
1049 "[[records/contacts/sarah-2]]",
1050 );
1051 fx.reindex();
1052
1053 // All three link forms ([[x]], [[x|d]], [[x.md]]) resolve to the same
1054 // target and are found; the linkers are returned in canonical bare form.
1055 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1056 assert_eq!(
1057 paths(&got),
1058 vec![
1059 "records/meetings/m1",
1060 "sources/emails/e1",
1061 "wiki/people/sarah",
1062 ]
1063 );
1064 }
1065
1066 #[test]
1067 fn backlinks_and_forwardlinks_round_trip_on_same_key() {
1068 // If A forwardlinks to B, then B backlinks to A — both expressed in the
1069 // identical bare key, so neighborhood can dedup across directions.
1070 let fx = Fixture::new();
1071 fx.write(
1072 "wiki/people/a.md",
1073 "wiki-page",
1074 "A",
1075 "Knows [[wiki/people/b]].",
1076 );
1077 fx.write("wiki/people/b.md", "wiki-page", "B", "");
1078 fx.reindex();
1079 let fwd = forwardlinks(&fx.store, &fx.p("wiki/people/a.md")).unwrap();
1080 let back = backlinks(&fx.store, &fx.p("wiki/people/b.md")).unwrap();
1081 assert_eq!(paths(&fwd), vec!["wiki/people/b"]);
1082 assert_eq!(paths(&back), vec!["wiki/people/a"]);
1083 }
1084
1085 #[test]
1086 fn backlinks_does_not_match_path_prefix_collisions() {
1087 let fx = Fixture::new();
1088 fx.write("records/contacts/sam.md", "contact", "Sam", "");
1089 // `sam-smith` shares the `sam` prefix; must NOT count as a backlink to `sam`.
1090 fx.write(
1091 "wiki/people/x.md",
1092 "wiki-page",
1093 "x",
1094 "[[records/contacts/sam-smith]]",
1095 );
1096 // The genuine backlink.
1097 fx.write(
1098 "wiki/people/y.md",
1099 "wiki-page",
1100 "y",
1101 "[[records/contacts/sam]]",
1102 );
1103 fx.reindex();
1104
1105 let got = backlinks(&fx.store, &fx.p("records/contacts/sam")).unwrap();
1106 assert_eq!(paths(&got), vec!["wiki/people/y"]);
1107 }
1108
1109 #[test]
1110 fn backlinks_excludes_self_reference() {
1111 let fx = Fixture::new();
1112 // A page that links to itself is not its own backlink.
1113 fx.write(
1114 "wiki/synthesis/overview.md",
1115 "wiki-page",
1116 "Overview",
1117 "This page [[wiki/synthesis/overview]] references itself.",
1118 );
1119 fx.reindex();
1120 let got = backlinks(&fx.store, &fx.p("wiki/synthesis/overview.md")).unwrap();
1121 assert!(
1122 got.is_empty(),
1123 "self-link must not appear as a backlink, got {got:?}"
1124 );
1125 }
1126
1127 #[test]
1128 fn backlinks_empty_when_nobody_links() {
1129 let fx = Fixture::new();
1130 fx.write("records/contacts/lonely.md", "contact", "Lonely", "");
1131 fx.write(
1132 "wiki/people/unrelated.md",
1133 "wiki-page",
1134 "x",
1135 "[[records/companies/acme]]",
1136 );
1137 fx.reindex();
1138 let got = backlinks(&fx.store, &fx.p("records/contacts/lonely.md")).unwrap();
1139 assert!(got.is_empty());
1140 }
1141
1142 #[test]
1143 fn backlinks_ignores_index_and_meta_files() {
1144 let fx = Fixture::new();
1145 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1146 // An index.md that lists the target must NOT be reported as a backlink
1147 // (indexes are catalog, not relationship edges).
1148 fx.write_raw(
1149 "records/contacts/index.md",
1150 "---\ntype: index\nscope: folder\nfolder: records/contacts\n---\n- [[records/contacts/sarah]] — Sarah\n",
1151 );
1152 fx.reindex();
1153 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1154 assert!(got.is_empty(), "index.md must be excluded, got {got:?}");
1155 }
1156
1157 #[test]
1158 fn backlinks_finds_body_only_edge_not_in_frontmatter_links_field() {
1159 // REGRESSION: the sidecar's `links` field carries only the file's
1160 // frontmatter `links:` list; it does NOT include wiki-links written in
1161 // the body or in other typed frontmatter fields. Answering backlinks
1162 // from `links[]` alone would silently miss this edge. The candidate set
1163 // is sidecar-bounded, but each candidate's edge is confirmed by parsing
1164 // the file (the same extraction forwardlinks uses), so a body-only link
1165 // must still register as a backlink.
1166 let fx = Fixture::new();
1167 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1168 // `meeting.md` links to sarah ONLY in its body — its frontmatter has no
1169 // `links:` field at all, so the sidecar record's `links` is empty.
1170 fx.write(
1171 "records/meetings/standup.md",
1172 "meeting",
1173 "Standup",
1174 "Discussed renewal with [[records/contacts/sarah]].",
1175 );
1176 fx.reindex();
1177
1178 // Guard the premise: the sidecar record really does carry an empty
1179 // `links` (so this test fails loudly if the index ever starts extracting
1180 // body links — at which point the backlink predicate could be revisited).
1181 let rec = fx
1182 .store
1183 .find_by_type("meeting")
1184 .unwrap()
1185 .into_iter()
1186 .find(|r| r.path == fx.p("records/meetings/standup.md"))
1187 .expect("meeting is catalogued in its sidecar");
1188 assert!(
1189 rec.links.is_empty(),
1190 "premise: the body link is NOT projected into the sidecar `links` field; got {:?}",
1191 rec.links
1192 );
1193
1194 // Yet backlinks still finds it — because it confirms via the file parse,
1195 // not via the sidecar `links` field.
1196 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1197 assert_eq!(
1198 paths(&got),
1199 vec!["records/meetings/standup"],
1200 "a body-only wiki-link must register as a backlink"
1201 );
1202 }
1203
1204 #[test]
1205 fn backlinks_finds_edge_in_typed_frontmatter_field() {
1206 // A wiki-link inside a *typed* frontmatter field (`company:`) is a real
1207 // edge forwardlinks follows, so backlinks must find it too — even though
1208 // the sidecar's `links` field (the `links:` key only) does not list it.
1209 let fx = Fixture::new();
1210 fx.write("records/companies/acme.md", "company", "Acme", "");
1211 fx.write_raw(
1212 "records/contacts/sarah.md",
1213 "---\ntype: contact\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Sarah\ncompany: [[records/companies/acme]]\n---\nBody with no links.\n",
1214 );
1215 fx.reindex();
1216 let got = backlinks(&fx.store, &fx.p("records/companies/acme.md")).unwrap();
1217 assert_eq!(
1218 paths(&got),
1219 vec!["records/contacts/sarah"],
1220 "a wiki-link in a typed frontmatter field is an incoming edge"
1221 );
1222 }
1223
1224 #[test]
1225 fn backlinks_unscoped_scans_the_tree_not_only_the_sidecar() {
1226 // REGRESSION (loop budget): an UNSCOPED `backlinks` must resolve incoming
1227 // edges with a SINGLE embedded-ripgrep pass over the tree
1228 // (`Store::find_links_to`), NOT by reading the sidecar candidate set and
1229 // then `read_to_string`-confirming each candidate (which re-opens every
1230 // content file → O(store); the documented >3x budget miss). A ripgrep
1231 // pass is the same scan engine `validate`/`rename`/`dbmd links` ride, and
1232 // the tree — not the sidecar — is its ground truth: a linker that is on
1233 // disk but absent from every sidecar (stale / never-built index) is still
1234 // found. We assert that behaviorally, which fails loudly if the unscoped
1235 // path ever reverts to the sidecar-bounded per-candidate confirm loop
1236 // (that loop would NOT find the unindexed linker).
1237 let fx = Fixture::new();
1238 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1239 fx.write(
1240 "wiki/people/indexed.md",
1241 "wiki-page",
1242 "Indexed",
1243 "[[records/contacts/sarah]]",
1244 );
1245 fx.reindex(); // builds sidecars for sarah + the indexed linker
1246
1247 // Now drop a NEW linker on disk WITHOUT reindexing — it is on disk but in
1248 // no sidecar.
1249 fx.write(
1250 "wiki/people/unindexed.md",
1251 "wiki-page",
1252 "Unindexed",
1253 "[[records/contacts/sarah]]",
1254 );
1255
1256 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1257 assert_eq!(
1258 paths(&got),
1259 vec!["wiki/people/indexed", "wiki/people/unindexed"],
1260 "unscoped backlinks ripgrep-scans the tree, so the on-disk-but-unindexed \
1261 linker is found too — not only the sidecar-catalogued one"
1262 );
1263 }
1264
1265 #[test]
1266 fn backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk() {
1267 // REGRESSION (scale contract): the SCOPED form (`--type` / `--in`) is the
1268 // I/O-scoped path — it enumerates candidates from the relevant type-folder
1269 // `index.jsonl` sidecars and parses only those, NOT a whole-tree walk.
1270 // That is what makes the scope an I/O scope, not just a result filter:
1271 // a linker that is on disk but ABSENT from the sidecar (stale / never-built
1272 // index) is NOT discovered by the scoped call (the sidecar bounds which
1273 // files are candidates). This is the loop-vs-walk distinction the SPEC
1274 // draws, and it is exactly the inverse of the unscoped tree scan above.
1275 let fx = Fixture::new();
1276 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1277 fx.write(
1278 "wiki/people/indexed.md",
1279 "wiki-page",
1280 "Indexed",
1281 "[[records/contacts/sarah]]",
1282 );
1283 fx.reindex(); // builds sidecars for sarah + the indexed linker
1284
1285 // Drop a NEW wiki-page linker on disk WITHOUT reindexing — on disk, in no
1286 // sidecar.
1287 fx.write(
1288 "wiki/people/unindexed.md",
1289 "wiki-page",
1290 "Unindexed",
1291 "[[records/contacts/sarah]]",
1292 );
1293
1294 // Scoped to the `wiki-page` type: the candidate set is the sidecar's, so
1295 // only the catalogued linker is found — the unindexed one is invisible.
1296 let only_wiki_pages = vec!["wiki-page".to_string()];
1297 let got = backlinks_filtered(
1298 &fx.store,
1299 &fx.p("records/contacts/sarah.md"),
1300 &only_wiki_pages,
1301 None,
1302 )
1303 .unwrap();
1304 assert_eq!(
1305 paths(&got),
1306 vec!["wiki/people/indexed"],
1307 "scoped backlinks reads the sidecar candidate set; the on-disk-but-unindexed \
1308 linker is not tree-walked"
1309 );
1310 }
1311
1312 #[test]
1313 fn backlinks_filtered_type_scopes_the_candidate_set() {
1314 // `--type` narrows backlinks to linkers of that type. Two files link to
1315 // the target — one `meeting`, one `wiki-page`; filtering to `meeting`
1316 // returns only the meeting.
1317 let fx = Fixture::new();
1318 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1319 fx.write(
1320 "records/meetings/m1.md",
1321 "meeting",
1322 "Call",
1323 "[[records/contacts/sarah]]",
1324 );
1325 fx.write(
1326 "wiki/people/bio.md",
1327 "wiki-page",
1328 "Bio",
1329 "[[records/contacts/sarah]]",
1330 );
1331 fx.reindex();
1332
1333 let only_meetings = vec!["meeting".to_string()];
1334 let got = backlinks_filtered(
1335 &fx.store,
1336 &fx.p("records/contacts/sarah.md"),
1337 &only_meetings,
1338 None,
1339 )
1340 .unwrap();
1341 assert_eq!(
1342 paths(&got),
1343 vec!["records/meetings/m1"],
1344 "--type meeting must exclude the wiki-page linker"
1345 );
1346
1347 // Unfiltered, both come back — proving the filter (not the data) dropped one.
1348 let all = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1349 assert_eq!(paths(&all), vec!["records/meetings/m1", "wiki/people/bio"]);
1350 }
1351
1352 #[test]
1353 fn backlinks_filtered_layer_scopes_the_candidate_set() {
1354 // `--in <layer>` narrows backlinks to linkers under that layer.
1355 let fx = Fixture::new();
1356 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1357 fx.write(
1358 "records/meetings/m1.md",
1359 "meeting",
1360 "Call",
1361 "[[records/contacts/sarah]]",
1362 );
1363 fx.write(
1364 "wiki/people/bio.md",
1365 "wiki-page",
1366 "Bio",
1367 "[[records/contacts/sarah]]",
1368 );
1369 fx.reindex();
1370
1371 let got = backlinks_filtered(
1372 &fx.store,
1373 &fx.p("records/contacts/sarah.md"),
1374 &[],
1375 Some(Layer::Wiki),
1376 )
1377 .unwrap();
1378 assert_eq!(
1379 paths(&got),
1380 vec!["wiki/people/bio"],
1381 "--in wiki must keep only the wiki-layer linker"
1382 );
1383
1384 let records_only = backlinks_filtered(
1385 &fx.store,
1386 &fx.p("records/contacts/sarah.md"),
1387 &[],
1388 Some(Layer::Records),
1389 )
1390 .unwrap();
1391 assert_eq!(paths(&records_only), vec!["records/meetings/m1"]);
1392 }
1393
1394 // ── neighborhood ─────────────────────────────────────────────────────────
1395
1396 #[test]
1397 fn neighborhood_hops_zero_is_empty() {
1398 let fx = Fixture::new();
1399 fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
1400 fx.write("wiki/people/b.md", "wiki-page", "B", "");
1401 let slice = neighborhood(
1402 &fx.store,
1403 &fx.p("wiki/people/a.md"),
1404 0,
1405 &[],
1406 Direction::Both,
1407 )
1408 .unwrap();
1409 assert_eq!(slice.seed, fx.p("wiki/people/a"));
1410 assert!(slice.nodes.is_empty());
1411 }
1412
1413 #[test]
1414 fn neighborhood_outgoing_one_hop_reads_summary_and_type() {
1415 let fx = Fixture::new();
1416 fx.write(
1417 "wiki/people/a.md",
1418 "wiki-page",
1419 "Person A",
1420 "Knows [[records/contacts/b]].",
1421 );
1422 fx.write("records/contacts/b.md", "contact", "Contact B summary", "");
1423 let slice = neighborhood(
1424 &fx.store,
1425 &fx.p("wiki/people/a.md"),
1426 1,
1427 &[],
1428 Direction::Outgoing,
1429 )
1430 .unwrap();
1431 assert_eq!(slice.nodes.len(), 1);
1432 let n = &slice.nodes[0];
1433 assert_eq!(n.path, fx.p("records/contacts/b"));
1434 assert_eq!(n.summary, "Contact B summary");
1435 assert_eq!(n.type_.as_deref(), Some("contact"));
1436 assert_eq!(n.hops, 1);
1437 assert_eq!(n.via, Some((fx.p("wiki/people/a"), Direction::Outgoing)));
1438 }
1439
1440 #[test]
1441 fn neighborhood_incoming_only_walks_backlinks() {
1442 let fx = Fixture::new();
1443 // a -> seed (incoming to seed). seed -> c (outgoing from seed).
1444 fx.write(
1445 "wiki/people/seed.md",
1446 "wiki-page",
1447 "Seed",
1448 "Out to [[wiki/people/c]].",
1449 );
1450 fx.write(
1451 "wiki/people/a.md",
1452 "wiki-page",
1453 "A",
1454 "In to [[wiki/people/seed]].",
1455 );
1456 fx.write("wiki/people/c.md", "wiki-page", "C", "");
1457 fx.reindex();
1458 let slice = neighborhood(
1459 &fx.store,
1460 &fx.p("wiki/people/seed.md"),
1461 1,
1462 &[],
1463 Direction::Incoming,
1464 )
1465 .unwrap();
1466 // Incoming direction: only `a` (which links TO seed), not `c`.
1467 assert_eq!(
1468 paths(
1469 &slice
1470 .nodes
1471 .iter()
1472 .map(|n| n.path.clone())
1473 .collect::<Vec<_>>()
1474 ),
1475 vec!["wiki/people/a"]
1476 );
1477 assert_eq!(
1478 slice.nodes[0].via,
1479 Some((fx.p("wiki/people/seed"), Direction::Incoming))
1480 );
1481 }
1482
1483 #[test]
1484 fn neighborhood_bounded_bfs_respects_hop_limit_and_min_distance() {
1485 let fx = Fixture::new();
1486 // Chain a -> b -> c -> d, all outgoing.
1487 fx.write("wiki/c/a.md", "wiki-page", "A", "[[wiki/c/b]]");
1488 fx.write("wiki/c/b.md", "wiki-page", "B", "[[wiki/c/c]]");
1489 fx.write("wiki/c/c.md", "wiki-page", "C", "[[wiki/c/d]]");
1490 fx.write("wiki/c/d.md", "wiki-page", "D", "");
1491 let slice =
1492 neighborhood(&fx.store, &fx.p("wiki/c/a.md"), 2, &[], Direction::Outgoing).unwrap();
1493 // 2 hops reaches b (1) and c (2), not d (3).
1494 let by_path: HashMap<String, u32> = slice
1495 .nodes
1496 .iter()
1497 .map(|n| (n.path.to_string_lossy().to_string(), n.hops))
1498 .collect();
1499 assert_eq!(by_path.get("wiki/c/b").copied(), Some(1));
1500 assert_eq!(by_path.get("wiki/c/c").copied(), Some(2));
1501 assert_eq!(by_path.get("wiki/c/d"), None);
1502 assert_eq!(slice.nodes.len(), 2);
1503 }
1504
1505 #[test]
1506 fn neighborhood_records_min_hops_on_diamond() {
1507 let fx = Fixture::new();
1508 // Diamond: a -> b, a -> c, b -> d, c -> d. d is reachable at hop 2 from
1509 // either branch; it must be recorded once, at hop 2.
1510 fx.write("wiki/d/a.md", "wiki-page", "A", "[[wiki/d/b]] [[wiki/d/c]]");
1511 fx.write("wiki/d/b.md", "wiki-page", "B", "[[wiki/d/d]]");
1512 fx.write("wiki/d/c.md", "wiki-page", "C", "[[wiki/d/d]]");
1513 fx.write("wiki/d/d.md", "wiki-page", "D", "");
1514 let slice =
1515 neighborhood(&fx.store, &fx.p("wiki/d/a.md"), 3, &[], Direction::Outgoing).unwrap();
1516 let d_nodes: Vec<&ContextNode> = slice
1517 .nodes
1518 .iter()
1519 .filter(|n| n.path == fx.p("wiki/d/d"))
1520 .collect();
1521 assert_eq!(d_nodes.len(), 1, "d must appear exactly once");
1522 assert_eq!(d_nodes[0].hops, 2, "d's min distance from a is 2");
1523 // b and c at hop 1, d at hop 2 => 3 nodes total, no cycle blowup.
1524 assert_eq!(slice.nodes.len(), 3);
1525 }
1526
1527 #[test]
1528 fn neighborhood_type_filter_narrows_results_but_not_traversal() {
1529 let fx = Fixture::new();
1530 // seed -> contact -> meeting. Filtering to `meeting` must still reach
1531 // the meeting THROUGH the (excluded) contact at hop 2.
1532 fx.write(
1533 "wiki/people/seed.md",
1534 "wiki-page",
1535 "Seed",
1536 "[[records/contacts/sarah]]",
1537 );
1538 fx.write(
1539 "records/contacts/sarah.md",
1540 "contact",
1541 "Sarah",
1542 "[[records/meetings/m1]]",
1543 );
1544 fx.write("records/meetings/m1.md", "meeting", "Renewal call", "");
1545 let only_meetings = vec!["meeting".to_string()];
1546 let slice = neighborhood(
1547 &fx.store,
1548 &fx.p("wiki/people/seed.md"),
1549 2,
1550 &only_meetings,
1551 Direction::Outgoing,
1552 )
1553 .unwrap();
1554 // Only the meeting is returned; the contact is traversed but filtered out.
1555 assert_eq!(slice.nodes.len(), 1);
1556 assert_eq!(slice.nodes[0].path, fx.p("records/meetings/m1"));
1557 assert_eq!(slice.nodes[0].type_.as_deref(), Some("meeting"));
1558 assert_eq!(slice.nodes[0].hops, 2);
1559 }
1560
1561 #[test]
1562 fn neighborhood_cycle_terminates() {
1563 let fx = Fixture::new();
1564 // a <-> b cycle. Must not loop forever; each appears once.
1565 fx.write("wiki/g/a.md", "wiki-page", "A", "[[wiki/g/b]]");
1566 fx.write("wiki/g/b.md", "wiki-page", "B", "[[wiki/g/a]]");
1567 fx.reindex();
1568 let slice =
1569 neighborhood(&fx.store, &fx.p("wiki/g/a.md"), 10, &[], Direction::Both).unwrap();
1570 // From a: b is the only other node (a is the seed, excluded).
1571 assert_eq!(
1572 paths(
1573 &slice
1574 .nodes
1575 .iter()
1576 .map(|n| n.path.clone())
1577 .collect::<Vec<_>>()
1578 ),
1579 vec!["wiki/g/b"]
1580 );
1581 }
1582
1583 // ── orphans ──────────────────────────────────────────────────────────────
1584
1585 #[test]
1586 fn orphans_finds_files_with_no_edges_either_direction() {
1587 let fx = Fixture::new();
1588 // Wired pair: a links to b (a has outgoing, b has incoming).
1589 fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
1590 fx.write("wiki/people/b.md", "wiki-page", "B", "");
1591 // Orphan: no links in or out.
1592 fx.write(
1593 "sources/emails/lonely.md",
1594 "email",
1595 "Lonely email",
1596 "Just text, no links.",
1597 );
1598 let got = orphans(&fx.store, None).unwrap();
1599 assert_eq!(paths(&got), vec!["sources/emails/lonely.md"]);
1600 }
1601
1602 #[test]
1603 fn orphans_file_with_only_broken_outgoing_link_is_orphan() {
1604 let fx = Fixture::new();
1605 // Broken targets are validation issues, not graph edges to another
1606 // store file. A file whose only link points nowhere is still an orphan.
1607 fx.write(
1608 "wiki/people/a.md",
1609 "wiki-page",
1610 "A",
1611 "[[records/contacts/ghost]]",
1612 );
1613 let got = orphans(&fx.store, None).unwrap();
1614 assert!(
1615 paths(&got).contains(&"wiki/people/a.md".to_string()),
1616 "broken outgoing links must not wire the graph: {got:?}"
1617 );
1618 }
1619
1620 #[test]
1621 fn orphans_file_with_only_incoming_is_not_orphan() {
1622 let fx = Fixture::new();
1623 // `target` has no outgoing links but IS linked to by `linker` — not an orphan.
1624 fx.write("records/contacts/target.md", "contact", "Target", "");
1625 fx.write(
1626 "wiki/people/linker.md",
1627 "wiki-page",
1628 "Linker",
1629 "[[records/contacts/target]]",
1630 );
1631 let got = orphans(&fx.store, None).unwrap();
1632 assert!(
1633 !paths(&got).contains(&"records/contacts/target.md".to_string()),
1634 "incoming-only is not an orphan: {got:?}"
1635 );
1636 // `linker` has outgoing, so also not an orphan.
1637 assert!(!paths(&got).contains(&"wiki/people/linker.md".to_string()));
1638 }
1639
1640 #[test]
1641 fn orphans_incoming_link_from_other_layer_unorphans() {
1642 let fx = Fixture::new();
1643 // Candidate in records/, only incoming edge comes from wiki/ — a
1644 // cross-layer link must still un-orphan it even when scoped to records.
1645 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1646 fx.write(
1647 "wiki/people/sarah.md",
1648 "wiki-page",
1649 "bio",
1650 "[[records/contacts/sarah]]",
1651 );
1652 // A genuine orphan in records/ to prove the scope still returns something.
1653 fx.write("records/contacts/nemo.md", "contact", "Nemo", "");
1654 let got = orphans(&fx.store, Some(Layer::Records)).unwrap();
1655 assert_eq!(paths(&got), vec!["records/contacts/nemo.md"]);
1656 }
1657
1658 #[test]
1659 fn orphans_layer_scope_filters_candidates() {
1660 let fx = Fixture::new();
1661 // One orphan per layer.
1662 fx.write("sources/emails/s.md", "email", "S", "no links");
1663 fx.write("records/contacts/r.md", "contact", "R", "");
1664 fx.write("wiki/people/w.md", "wiki-page", "W", "");
1665 let only_wiki = orphans(&fx.store, Some(Layer::Wiki)).unwrap();
1666 assert_eq!(paths(&only_wiki), vec!["wiki/people/w.md"]);
1667 let only_sources = orphans(&fx.store, Some(Layer::Sources)).unwrap();
1668 assert_eq!(paths(&only_sources), vec!["sources/emails/s.md"]);
1669 // No scope: all three, sorted (records, sources, wiki).
1670 let all = orphans(&fx.store, None).unwrap();
1671 assert_eq!(
1672 paths(&all),
1673 vec![
1674 "records/contacts/r.md",
1675 "sources/emails/s.md",
1676 "wiki/people/w.md",
1677 ]
1678 );
1679 }
1680
1681 #[test]
1682 fn orphans_self_link_does_not_count_as_an_edge() {
1683 let fx = Fixture::new();
1684 // A page that only links to itself has no real edges => still an orphan.
1685 fx.write(
1686 "wiki/synthesis/solo.md",
1687 "wiki-page",
1688 "Solo",
1689 "I reference [[wiki/synthesis/solo]] only.",
1690 );
1691 let got = orphans(&fx.store, None).unwrap();
1692 assert_eq!(paths(&got), vec!["wiki/synthesis/solo.md"]);
1693 }
1694
1695 #[test]
1696 fn orphans_excludes_index_and_db_files() {
1697 let fx = Fixture::new();
1698 // A lone index.md / DB.md must never be reported as an orphan content file.
1699 fx.write_raw(
1700 "wiki/index.md",
1701 "---\ntype: index\nscope: layer\nfolder: wiki\n---\n# wiki\n",
1702 );
1703 fx.write(
1704 "wiki/people/real-orphan.md",
1705 "wiki-page",
1706 "Real",
1707 "no links",
1708 );
1709 let got = orphans(&fx.store, None).unwrap();
1710 assert_eq!(paths(&got), vec!["wiki/people/real-orphan.md"]);
1711 }
1712
1713 // ── frontmatter_block helper ─────────────────────────────────────────────
1714
1715 #[test]
1716 fn frontmatter_block_extracts_between_fences() {
1717 let text = "---\ntype: contact\nsummary: hi\n---\nbody here\n";
1718 assert_eq!(
1719 frontmatter_block(text),
1720 Some("type: contact\nsummary: hi\n")
1721 );
1722 }
1723
1724 #[test]
1725 fn frontmatter_block_none_without_leading_fence() {
1726 let text = "no frontmatter here\n";
1727 assert_eq!(frontmatter_block(text), None);
1728 }
1729}