dbmd_core/graph.rs
1//! `graph` — the wiki-link **relationship layer**.
2//!
3//! Wiki-links are curated-relevance edges (the LLM wrote them), so the graph's
4//! job is to **assemble the relevant context around a seed**, not to be
5//! analyzed. **All ops are on-demand — there is no maintained graph** (a
6//! persistent graph is the roadmap engine).
7//!
8//! [`backlinks`] / [`forwardlinks`] are loop ops (O(changed), never O(store)).
9//! [`neighborhood`] is the high-value context-hydration op. [`orphans`] is a
10//! SWEEP curation worklist.
11//!
12//! Whole-graph analytics (connected components, cycle detection, shortest
13//! path, sinks/sources, DOT/JSON export) are deliberately **not** here — a
14//! human studying the graph opens the store in Obsidian; broken-link detection
15//! is [`crate::validate`]'s job (`WIKI_LINK_BROKEN`).
16//!
17//! ## Implementation note — two paths for the incoming-edge scan
18//!
19//! The scale contract (SPEC § Tooling, plan: *"the interactive loop is
20//! O(changed), never O(store)"*) is the load-bearing rule here. [`backlinks`]
21//! is a loop op, so it must **not** open and `read_to_string` every content file
22//! in the store on each call. It resolves incoming edges by one of two paths,
23//! chosen by whether the call is scoped:
24//!
25//! - **Unscoped** (`dbmd graph backlinks <x>`, no `--type`/`--in`): one
26//! embedded-ripgrep pass for the literal `[[<target>]]` over the tree, via
27//! [`Store::find_links_to`] (`grep` + `ignore`, early-exit per file) — the
28//! same scan engine [`crate::validate`]'s working-set incoming-linker step
29//! uses. A single store traversal with cheap presence-only matching, not N
30//! whole-file parses; that is what keeps the unscoped call inside the loop
31//! budget. [`backlinks`] then filters the raw hits to content files and emits
32//! canonical bare targets (its relationship view), where the lower-level
33//! [`Store::find_links_to`] returns every `.md` the text appears in.
34//! - **Scoped** (`--type` / `--in`): the candidate set is enumerated from the
35//! relevant type-folder `index.jsonl` sidecars — one sequential read per
36//! type-folder (via [`crate::query::Query`], which sits on
37//! [`Store::read_type_index`]) — and each candidate is confirmed by a
38//! single-file parse. That is what makes `--type` / `--in` an *I/O* scope, not
39//! just a result filter: a typed/layer-scoped `backlinks` reads only the
40//! relevant folder(s)' sidecars and parses only those files.
41//!
42//! **Why the scoped path confirms by parsing the candidate, not by trusting the
43//! sidecar's `links` field.** A sidecar record's `links` is the file's
44//! *frontmatter* `links:` list only — it does **not** capture wiki-links written
45//! in the body or inside other typed frontmatter fields (`company: [[…]]`,
46//! `attendees: [ … ]`, `derived_from: [ … ]`). [`forwardlinks`] extracts edges
47//! from the whole file, so to keep the two directions on the **same** edge set
48//! (an incoming edge to X is exactly: some file whose [`forwardlinks`] contains
49//! X) the incoming-edge confirmation re-parses each candidate file the same way.
50//! The sidecar bounds *which* files are candidates; the parse decides whether
51//! each truly links. The unscoped ripgrep path stays on that same edge set by
52//! matching the link text wherever it lives in the file (frontmatter or body).
53//! A node's `summary` / `type` likewise read frontmatter directly (the source of
54//! truth the sidecar is derived from; never stale).
55
56use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
57use std::io;
58use std::path::{Path, PathBuf};
59
60use ignore::WalkBuilder;
61use regex::Regex;
62
63use crate::index::IndexRecord;
64use crate::query::Query;
65use crate::store::{Layer, Store, StoreError};
66
67/// Which edge directions a traversal follows.
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
69pub enum Direction {
70 /// Incoming edges only (backlinks).
71 Incoming,
72 /// Outgoing edges only (forwardlinks).
73 Outgoing,
74 /// Both directions.
75 Both,
76}
77
78/// One node reached during a [`neighborhood`] hydration: the file, its
79/// `summary`, and how it connects back toward the seed.
80#[derive(Debug, Clone, PartialEq, Eq)]
81pub struct ContextNode {
82 /// The store-relative path of the reached file.
83 pub path: PathBuf,
84 /// The file's `summary` (read from its sidecar entry / frontmatter).
85 pub summary: String,
86 /// The file's `type`, when known.
87 pub type_: Option<String>,
88 /// Hop distance from the seed (the seed itself is 0).
89 pub hops: u32,
90 /// The relationship edge that brought this node into the slice: the path it
91 /// links to/from one hop closer to the seed, and the direction.
92 pub via: Option<(PathBuf, Direction)>,
93}
94
95/// The readable working-set digest [`neighborhood`] returns: the seed plus the
96/// reached nodes with their summaries and connections. The relationship-axis
97/// "turn a seed into context" primitive.
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub struct ContextSlice {
100 /// The seed the slice was hydrated from.
101 pub seed: PathBuf,
102 /// The reached nodes (excluding the seed), in BFS order.
103 pub nodes: Vec<ContextNode>,
104}
105
106/// Incoming edges to `path`: files that wiki-link to it. The blast-radius /
107/// dependents primitive before an edit. Store-wide (every layer / every type);
108/// see [`backlinks_filtered`] for the `--type` / `--in`-scoped form.
109///
110/// `path` is the store-relative target as it would be written inside a
111/// wiki-link (with or without a trailing `.md`; both resolve to the same
112/// target). Returns each linking file as its **canonical bare wiki-link path**
113/// (store-relative, no `.md`) — the same key [`forwardlinks`] emits, so the two
114/// directions round-trip and [`neighborhood`] can use one node identity.
115/// Deduped, sorted, never including the seed itself.
116pub fn backlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
117 backlinks_filtered(store, path, &[], None)
118}
119
120/// Incoming edges to `path`, scoped by the linking file's `type` and/or layer —
121/// the `dbmd graph backlinks --type/--in` surface.
122///
123/// **Scale (the loop contract).** Two paths, by whether the call is scoped:
124///
125/// - **Unscoped** (`types` empty *and* `layer` `None`): one embedded-ripgrep
126/// pass for `[[<target>]]` across the store via [`Store::find_links_to`] — a
127/// single `grep` + `ignore` traversal with early-exit per file, never a
128/// `read_to_string` of every content file. This is the same scan engine
129/// [`crate::validate::validate_working_set`]'s incoming-linker step rides, and
130/// it keeps the unscoped call inside the loop budget (the old per-candidate
131/// confirm-read re-opened every file in the store → O(store)).
132/// - **Scoped** (`types` and/or `layer` set): the candidate set — the files that
133/// *might* link to `path` — is read from the relevant type-folder
134/// `index.jsonl` sidecars, so the call touches only the named folder(s):
135/// O(folder), the sanctioned loop cost. Each candidate is then confirmed by a
136/// single-file parse. When `types` lists several types, every named type's
137/// folder is read and the candidate sets unioned; a `layer` further restricts
138/// the candidate paths to that layer.
139///
140/// **Correctness (one edge set, both paths).** An incoming edge to X is exactly:
141/// some file whose [`forwardlinks`] contains X — a wiki-link in the body or in
142/// *any* frontmatter field (`company: [[…]]`, `attendees: [ … ]`), not just the
143/// sidecar's frontmatter `links:` projection. Both paths honor that:
144/// - The unscoped scan matches the literal `[[<target>]]` text wherever it lives
145/// in a file (frontmatter or body), the same edges [`forwardlinks`] extracts.
146/// [`Store::find_links_to`] returns *every* `.md` carrying the link text
147/// (including `index.md` catalogs); [`backlinks`] is the relationship view, so
148/// the results are filtered to content files ([`is_content_rel`]) and emitted
149/// as canonical bare targets, self-excluded.
150/// - The scoped path confirms each candidate via [`file_links_to`], which
151/// delegates to [`forwardlinks`] (body + every frontmatter field) — so a
152/// body-only or typed-field edge is caught, not just the sidecar's `links:`
153/// list.
154///
155/// Result form (canonical bare paths, deduped, sorted, seed excluded) is
156/// identical on both paths and matches [`backlinks`].
157pub fn backlinks_filtered(
158 store: &Store,
159 path: &Path,
160 types: &[String],
161 layer: Option<Layer>,
162) -> Result<Vec<PathBuf>, StoreError> {
163 let target = normalize_target(path);
164 if target.is_empty() {
165 return Ok(Vec::new());
166 }
167
168 // Unscoped: one embedded-ripgrep pass over the store (O(store) scan with
169 // early-exit per file), not a per-candidate read of every content file.
170 // `find_links_to` returns every `.md` carrying the link text (incl. catalog
171 // `index.md`); narrow to content files and canonicalize to the bare target
172 // form `backlinks` emits, dropping the seed's self-link.
173 if types.is_empty() && layer.is_none() {
174 let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
175 for rel in store.find_links_to(path)? {
176 if !is_content_rel(&rel) {
177 continue;
178 }
179 let linker = normalize_target(&rel);
180 if linker.is_empty() || linker == target {
181 // A file never counts as its own backlink.
182 continue;
183 }
184 hits.insert(PathBuf::from(linker));
185 }
186 return Ok(hits.into_iter().collect());
187 }
188
189 // Scoped: read only the named folder(s)' sidecars for the candidate set, then
190 // confirm each candidate with a single-file parse — O(folder), the I/O scope
191 // `--type` / `--in` buys.
192 let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
193 for candidate in candidate_records(store, types, layer)? {
194 let rel = &candidate.path;
195 let candidate_target = normalize_target(rel);
196 if candidate_target.is_empty() || candidate_target == target {
197 // A file never counts as its own backlink.
198 continue;
199 }
200 // Confirm the edge by parsing the candidate file the same way
201 // forwardlinks does (body + all frontmatter), so body/typed-field links
202 // are caught — the sidecar's `links` field alone would miss them.
203 if file_links_to(store, rel, &target)? {
204 hits.insert(PathBuf::from(candidate_target));
205 }
206 }
207
208 Ok(hits.into_iter().collect())
209}
210
211/// Outgoing edges from `path`: the wiki-link targets extracted from that single
212/// file. Loop-fast; follow the evidence chain.
213///
214/// `path` is the store-relative path of the file to read. Targets are returned
215/// as store-relative paths (bare, no `.md`), deduped and sorted; the file's
216/// links to itself are dropped. A missing file yields an empty list (a
217/// dangling seed has no outgoing edges to report — broken-link detection is
218/// [`crate::validate`]'s job).
219pub fn forwardlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
220 let self_target = normalize_target(path);
221 let abs = match resolve_existing(store, path) {
222 Some(a) => a,
223 None => return Ok(Vec::new()),
224 };
225 let body = match std::fs::read_to_string(&abs) {
226 Ok(b) => b,
227 // A file that isn't valid UTF-8 (e.g. a binary source) carries no
228 // wiki-links we can extract.
229 Err(e) if e.kind() == io::ErrorKind::InvalidData => return Ok(Vec::new()),
230 Err(e) => return Err(StoreError::Io(e)),
231 };
232
233 let mut out: BTreeSet<PathBuf> = BTreeSet::new();
234 for target in extract_link_targets(&body) {
235 if target.is_empty() || target == self_target {
236 continue;
237 }
238 out.insert(PathBuf::from(target));
239 }
240 Ok(out.into_iter().collect())
241}
242
243/// The candidate set for an incoming-edge scan: the sidecar records that could
244/// link to the target, read from the type-folder `index.jsonl` sidecars (never
245/// a content-tree walk). `types`/`layer` narrow *which* sidecars are read — the
246/// I/O scope that keeps a typed/layer backlinks O(folder).
247///
248/// - `types` non-empty: read each type's folder sidecar (via [`Query`], which
249/// sits on [`Store::read_type_index`]), optionally layer-scoped, and union the
250/// records by path (a file appears once even if two type reads surface it).
251/// - `types` empty: every sidecar record under `layer` (or store-wide when
252/// `None`) via [`Store::sidecar_records`].
253fn candidate_records(
254 store: &Store,
255 types: &[String],
256 layer: Option<Layer>,
257) -> Result<Vec<IndexRecord>, StoreError> {
258 if types.is_empty() {
259 return store.sidecar_records(layer);
260 }
261 let mut by_path: std::collections::BTreeMap<PathBuf, IndexRecord> =
262 std::collections::BTreeMap::new();
263 for type_ in types {
264 let mut q = Query::new().with_type(type_);
265 if let Some(layer) = layer {
266 q = q.with_layer(layer);
267 }
268 for rec in q.execute(store)? {
269 by_path.insert(rec.path.clone(), rec);
270 }
271 }
272 Ok(by_path.into_values().collect())
273}
274
275/// True if the store file at `rel` carries a wiki-link whose canonical target
276/// equals `target`. Delegates to [`forwardlinks`] so the incoming-edge predicate
277/// is *exactly* the outgoing-edge extraction — body + every frontmatter field —
278/// keeping the two directions on one edge set. `forwardlinks` already emits
279/// canonical bare targets, so `target` (likewise normalized by the caller) is
280/// compared directly. A missing/binary file links to nothing.
281fn file_links_to(store: &Store, rel: &Path, target: &str) -> Result<bool, StoreError> {
282 let edges = forwardlinks(store, rel)?;
283 Ok(edges.iter().any(|e| e.as_os_str() == target))
284}
285
286/// **Context hydration.** Bounded BFS from `seed` over backlinks + forwardlinks
287/// out to `hops`, reading each reached file's `summary` + relationship, and
288/// returning a readable [`ContextSlice`]. Optionally filtered by `types` and
289/// `direction`. On-demand; no maintained graph. What the agent reaches for to
290/// assemble a working set in one call.
291///
292/// Traversal semantics:
293/// - **`hops`** bounds true graph distance from the seed. `hops == 0` returns
294/// an empty slice (the seed alone is no context).
295/// - **`direction`** selects which edges are followed: `Incoming` walks
296/// backlinks, `Outgoing` walks forwardlinks, `Both` walks the union.
297/// - **`types`**, when non-empty, filters which reached nodes appear in the
298/// slice — but traversal still passes *through* off-type nodes, so a
299/// `meeting` two hops out is still reachable through a `contact` even when
300/// filtering to `meeting`. (An empty `types` slice imposes no filter.)
301/// - Each node records the lowest hop count at which it is first reached (BFS
302/// order); the seed is never included as a node.
303pub fn neighborhood(
304 store: &Store,
305 seed: &Path,
306 hops: u32,
307 types: &[String],
308 direction: Direction,
309) -> Result<ContextSlice, StoreError> {
310 let seed_rel = PathBuf::from(normalize_target(seed));
311 let type_filter: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
312
313 // `discovered` guards against revisiting a node (and against re-adding the
314 // seed). BFS by levels so the first time we reach a node is its true min
315 // hop distance.
316 let mut discovered: HashSet<PathBuf> = HashSet::new();
317 discovered.insert(seed_rel.clone());
318
319 let mut nodes: Vec<ContextNode> = Vec::new();
320 let mut frontier: VecDeque<PathBuf> = VecDeque::new();
321 frontier.push_back(seed_rel.clone());
322
323 let mut hop = 0u32;
324 while hop < hops && !frontier.is_empty() {
325 hop += 1;
326 let level_size = frontier.len();
327 for _ in 0..level_size {
328 let current = frontier.pop_front().expect("frontier non-empty");
329
330 // Collect this node's edges in the requested direction(s). Each
331 // edge carries the neighbor path + the direction we traversed it.
332 let mut edges: Vec<(PathBuf, Direction)> = Vec::new();
333 if matches!(direction, Direction::Outgoing | Direction::Both) {
334 for nbr in forwardlinks(store, ¤t)? {
335 edges.push((nbr, Direction::Outgoing));
336 }
337 }
338 if matches!(direction, Direction::Incoming | Direction::Both) {
339 for nbr in backlinks(store, ¤t)? {
340 edges.push((nbr, Direction::Incoming));
341 }
342 }
343
344 for (neighbor, dir) in edges {
345 if !discovered.insert(neighbor.clone()) {
346 continue;
347 }
348 let (summary, type_) = read_summary_and_type(store, &neighbor);
349 let include = type_filter.is_empty()
350 || type_
351 .as_deref()
352 .map(|t| type_filter.contains(t))
353 .unwrap_or(false);
354 if include {
355 nodes.push(ContextNode {
356 path: neighbor.clone(),
357 summary,
358 type_,
359 hops: hop,
360 via: Some((current.clone(), dir)),
361 });
362 }
363 // Off-type nodes are not emitted but still seed the next BFS
364 // level, so the type filter narrows the *result*, not the
365 // reachable graph.
366 frontier.push_back(neighbor);
367 }
368 }
369 }
370
371 Ok(ContextSlice {
372 seed: seed_rel,
373 nodes,
374 })
375}
376
377/// **SWEEP.** Content files with no incoming AND no outgoing wiki-links — the
378/// curation worklist ("ingested but not yet wired into the wiki"). Off the
379/// loop. Optionally scoped to a layer.
380///
381/// A file is an orphan iff it neither links out to another store file nor is
382/// linked to by one. Incoming edges are counted across the *whole* store
383/// (a link from any layer un-orphans a file), even when `layer` scopes the
384/// candidate set. Returns store-relative paths, sorted.
385pub fn orphans(store: &Store, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
386 // One walk of the whole store: for every content file, record (a) whether
387 // it has any outgoing link, and (b) accumulate the set of every target any
388 // file links to (its incoming-edge set). Both come from a single read per
389 // file — the SWEEP cost.
390 let all = walk_content_files(store)?;
391
392 let mut linked_to: HashSet<PathBuf> = HashSet::new();
393 let mut has_outgoing: HashMap<PathBuf, bool> = HashMap::new();
394
395 for abs in &all {
396 let rel = match rel_path(store, abs) {
397 Some(r) => r,
398 None => continue,
399 };
400 let self_target = normalize_target(&rel);
401
402 let body = match std::fs::read_to_string(abs) {
403 Ok(b) => b,
404 Err(e) if e.kind() == io::ErrorKind::InvalidData => String::new(),
405 Err(e) => return Err(StoreError::Io(e)),
406 };
407
408 let mut outgoing = false;
409 for target in extract_link_targets(&body) {
410 if target.is_empty() || target == self_target {
411 continue;
412 }
413 outgoing = true;
414 linked_to.insert(PathBuf::from(target));
415 }
416 has_outgoing.insert(rel, outgoing);
417 }
418
419 let mut out: BTreeSet<PathBuf> = BTreeSet::new();
420 for abs in &all {
421 let rel = match rel_path(store, abs) {
422 Some(r) => r,
423 None => continue,
424 };
425 if let Some(layer) = layer {
426 if path_layer(&rel) != Some(layer) {
427 continue;
428 }
429 }
430 let outgoing = has_outgoing.get(&rel).copied().unwrap_or(false);
431 let incoming = linked_to.contains(&PathBuf::from(normalize_target(&rel)));
432 if !outgoing && !incoming {
433 out.insert(rel);
434 }
435 }
436
437 Ok(out.into_iter().collect())
438}
439
440/// **Write-side.** Rewrite every incoming `[[old]]` wiki-link in `text` to
441/// `[[new]]`, preserving any `|display` override and emitting the canonical bare
442/// target (no `.md`). The write-side twin of [`backlinks`]: where `backlinks`
443/// *finds* the files carrying an edge to `old`, this *retargets* that edge to
444/// `new` inside one file's contents.
445///
446/// `old` and `new` are store-relative paths in the wiki-link sense — both are
447/// passed through the same [`normalize_target`] the read side keys on, so the
448/// `.md` and bare spellings of `old` collapse to one target and a match here is
449/// exactly a match [`backlinks`] / [`Store::find_links_to`](crate::Store::find_links_to)
450/// would report. A link is rewritten iff its normalized target equals
451/// `normalize_target(old)`; prefix collisions (`old=a/b` vs `[[a/bc]]`) and
452/// short-form links never match. Returns the rewritten text (identical to the
453/// input when nothing matched), so the caller can cheaply detect a no-op.
454///
455/// Operates on the raw bytes (not a parser round-trip) so a link in frontmatter
456/// or body is retargeted uniformly and nothing else is reflowed.
457pub fn rewrite_links_to(text: &str, old: &Path, new: &Path) -> String {
458 let old_target = normalize_target(old);
459 let new_target = normalize_target(new);
460 if old_target.is_empty() {
461 // No target to match → never rewrite anything.
462 return text.to_string();
463 }
464
465 let re = rewrite_link_re();
466 let mut out = String::with_capacity(text.len());
467 let mut last = 0usize;
468 for caps in re.captures_iter(text) {
469 let whole = caps.get(0).expect("group 0 always present");
470 let raw_target = caps.get(1).map(|m| m.as_str()).unwrap_or("");
471 // Match on the SAME normalized key the read side uses, so `[[old]]`,
472 // `[[old.md]]`, and `[[./old]]` all retarget and a prefix like
473 // `[[old-jr]]` never does.
474 if normalize_target(Path::new(raw_target)) != old_target {
475 continue;
476 }
477 // Copy the gap since the previous rewrite verbatim, then the rebuilt
478 // link: canonical bare new target + the original display, if any.
479 out.push_str(&text[last..whole.start()]);
480 out.push_str("[[");
481 out.push_str(&new_target);
482 if let Some(display) = caps.get(2) {
483 out.push('|');
484 out.push_str(display.as_str());
485 }
486 out.push_str("]]");
487 last = whole.end();
488 }
489 out.push_str(&text[last..]);
490 out
491}
492
493// ── Private helpers ─────────────────────────────────────────────────────────
494
495/// The wiki-link regex used by the **write side** ([`rewrite_links_to`]):
496/// captures the target (group 1) AND the optional `|display` (group 2) so a
497/// rewrite can re-emit the display verbatim. The target/display character
498/// classes match [`wiki_link_re`] exactly, so the write side recognizes a link
499/// iff the read side does — the two never disagree on what a `[[…]]` is.
500fn rewrite_link_re() -> Regex {
501 Regex::new(r"\[\[([^\]\|\n]+?)(?:\|([^\]\n]*))?\]\]")
502 .expect("static wiki-link rewrite regex compiles")
503}
504
505/// Normalize a store-relative path into the canonical wiki-link target form:
506/// forward slashes, no leading `./` or `/`, and no trailing `.md`. This is the
507/// single key that incoming/outgoing edges are compared on, so the `.md` and
508/// bare forms of a target unify.
509fn normalize_target(path: &Path) -> String {
510 let mut s = path.to_string_lossy().replace('\\', "/");
511 while let Some(rest) = s.strip_prefix("./") {
512 s = rest.to_string();
513 }
514 let s = s.trim_start_matches('/');
515 let s = s.strip_suffix(".md").unwrap_or(s);
516 s.trim().to_string()
517}
518
519/// The wiki-link regex: `[[target]]` / `[[target|display]]`. Captures the raw
520/// target (group 1). Compiled once per call site; cheap.
521fn wiki_link_re() -> Regex {
522 // target = anything up to the first `|` or `]`. display (optional) is
523 // discarded. Matches across a single line/body slice.
524 Regex::new(r"\[\[([^\]\|\n]+?)(?:\|[^\]\n]*)?\]\]").expect("static wiki-link regex compiles")
525}
526
527/// Extract every wiki-link target from a body, normalized to the canonical
528/// store-relative form. Order-preserving; duplicates kept (callers dedup).
529fn extract_link_targets(body: &str) -> Vec<String> {
530 let re = wiki_link_re();
531 re.captures_iter(body)
532 .filter_map(|c| c.get(1))
533 .map(|m| normalize_target(Path::new(m.as_str().trim())))
534 .filter(|t| !t.is_empty())
535 .collect()
536}
537
538/// Resolve the store root + a store-relative path to the absolute on-disk file,
539/// trying the path as written and then with a `.md` extension. `None` if
540/// neither exists.
541fn resolve_existing(store: &Store, store_relative: &Path) -> Option<PathBuf> {
542 let direct = store.root.join(store_relative);
543 if direct.is_file() {
544 return Some(direct);
545 }
546 let normalized = normalize_target(store_relative);
547 let with_md = store.root.join(format!("{normalized}.md"));
548 if with_md.is_file() {
549 return Some(with_md);
550 }
551 None
552}
553
554/// Convert an absolute path under the store root into its store-relative form.
555fn rel_path(store: &Store, abs: &Path) -> Option<PathBuf> {
556 abs.strip_prefix(&store.root).ok().map(|p| p.to_path_buf())
557}
558
559/// Which layer a store-relative path sits in, by its first component.
560fn path_layer(rel: &Path) -> Option<Layer> {
561 let first = rel.components().next()?;
562 match first.as_os_str().to_str()? {
563 "sources" => Some(Layer::Sources),
564 "records" => Some(Layer::Records),
565 "wiki" => Some(Layer::Wiki),
566 _ => None,
567 }
568}
569
570/// True if a store-relative path is a *content* file: under `sources/`,
571/// `records/`, or `wiki/`, a `.md` file, and not an `index.md`. Meta files
572/// (`DB.md`, `log.md`, `log/…`, sidecars) are excluded.
573fn is_content_rel(rel: &Path) -> bool {
574 if path_layer(rel).is_none() {
575 return false;
576 }
577 match rel.extension().and_then(|e| e.to_str()) {
578 Some("md") => {}
579 _ => return false,
580 }
581 rel.file_name().and_then(|n| n.to_str()) != Some("index.md")
582}
583
584/// Walk every content `.md` file in the store via the **`ignore`** walker
585/// (the ripgrep directory engine). Only the three layer roots
586/// (`sources/`/`records/`/`wiki/`) are descended, so `DB.md`, `log.md`, and
587/// `log/` at the store root are structurally never reached; hidden dirs and
588/// per-folder `index.md` sidecars are filtered out ([`is_content_rel`]). Honors
589/// `.gitignore` the way `rg` does. Returns absolute paths. SWEEP-class.
590fn walk_content_files(store: &Store) -> Result<Vec<PathBuf>, StoreError> {
591 let mut out = Vec::new();
592 for layer in Layer::all() {
593 let dir = store.root.join(layer_dir_name(layer));
594 if !dir.is_dir() {
595 continue;
596 }
597 let walker = WalkBuilder::new(&dir)
598 .hidden(true)
599 .git_ignore(true)
600 .git_global(false)
601 .require_git(false)
602 .build();
603 for result in walker {
604 let entry = result.map_err(|e| StoreError::Search {
605 root: store.root.clone(),
606 message: format!("walk failed: {e}"),
607 })?;
608 if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
609 continue;
610 }
611 let abs = entry.into_path();
612 if let Some(rel) = rel_path(store, &abs) {
613 if is_content_rel(&rel) {
614 out.push(abs);
615 }
616 }
617 }
618 }
619 Ok(out)
620}
621
622/// The on-disk folder name for a layer. Mirrors `Layer::dir_name`; kept local
623/// so the graph module owns its own copy rather than coupling to that body.
624fn layer_dir_name(layer: Layer) -> &'static str {
625 match layer {
626 Layer::Sources => "sources",
627 Layer::Records => "records",
628 Layer::Wiki => "wiki",
629 }
630}
631
632/// Read a reached node's `summary` and `type` from its frontmatter. A missing
633/// file, missing frontmatter, or unparseable YAML degrades to an empty summary
634/// / unknown type rather than failing the whole hydration — `neighborhood` is
635/// best-effort context assembly, not validation.
636fn read_summary_and_type(store: &Store, rel: &Path) -> (String, Option<String>) {
637 let abs = match resolve_existing(store, rel) {
638 Some(a) => a,
639 None => return (String::new(), None),
640 };
641 let text = match std::fs::read_to_string(&abs) {
642 Ok(t) => t,
643 Err(_) => return (String::new(), None),
644 };
645 let yaml = match frontmatter_block(&text) {
646 Some(y) => y,
647 None => return (String::new(), None),
648 };
649 let value: serde_yml::Value = match serde_yml::from_str(yaml) {
650 Ok(v) => v,
651 Err(_) => return (String::new(), None),
652 };
653 let summary = value
654 .get("summary")
655 .and_then(|v| v.as_str())
656 .unwrap_or("")
657 .to_string();
658 let type_ = value
659 .get("type")
660 .and_then(|v| v.as_str())
661 .map(|s| s.to_string());
662 (summary, type_)
663}
664
665/// Return the YAML between the opening and closing `---` fences (exclusive), or
666/// `None` if the text has no leading frontmatter block. Local mirror of the
667/// parser's split so the graph module stays self-contained.
668fn frontmatter_block(text: &str) -> Option<&str> {
669 let rest = text
670 .strip_prefix("---\n")
671 .or_else(|| text.strip_prefix("---\r\n"))?;
672 // Find the closing fence: a line that is exactly `---`.
673 let mut idx = 0usize;
674 for line in rest.split_inclusive('\n') {
675 let trimmed = line.trim_end_matches(['\r', '\n']);
676 if trimmed == "---" {
677 return Some(&rest[..idx]);
678 }
679 idx += line.len();
680 }
681 None
682}
683
684#[cfg(test)]
685mod tests {
686 use super::*;
687 use std::fs;
688 use tempfile::TempDir;
689
690 use crate::parser::Config;
691
692 // ── Fixture builder ─────────────────────────────────────────────────────
693 //
694 // A real on-disk store in a tempdir. We write actual files (frontmatter +
695 // wiki-links) and exercise the real code paths. The fixture constructs the
696 // `Store` by its public fields rather than `Store::open`, so the graph
697 // tests stand on their own and do not depend on any other module's
698 // behavior. Each test asserts the behavior the SPEC promises, derived from
699 // intent, never from echoing the function's own output.
700 //
701 // `backlinks` (and `neighborhood` in any incoming direction) enumerate their
702 // candidate set from the type-folder `index.jsonl` sidecars — the loop
703 // contract: never a whole-store content walk. A real db.md store maintains
704 // those sidecars write-through, so a test that exercises backlinks must call
705 // [`Fixture::reindex`] after writing its files to build them (the SWEEP that
706 // `dbmd index rebuild` runs). Forwardlinks/orphans read content directly and
707 // need no sidecar.
708
709 struct Fixture {
710 _tmp: TempDir,
711 store: Store,
712 }
713
714 impl Fixture {
715 fn new() -> Self {
716 let tmp = TempDir::new().expect("tempdir");
717 let root = tmp.path().to_path_buf();
718 fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n# store\n").expect("DB.md");
719 let store = Store {
720 root,
721 config: Config::default(),
722 };
723 Fixture { _tmp: tmp, store }
724 }
725
726 /// Write a content file at a store-relative path with the given type,
727 /// summary, and body. Creates parent dirs.
728 fn write(&self, rel: &str, type_: &str, summary: &str, body: &str) {
729 let abs = self.store.root.join(rel);
730 fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
731 let contents = format!(
732 "---\ntype: {type_}\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: {summary}\n---\n{body}\n"
733 );
734 fs::write(&abs, contents).expect("write file");
735 }
736
737 /// Write a raw file verbatim (for frontmatter-shape edge cases).
738 fn write_raw(&self, rel: &str, contents: &str) {
739 let abs = self.store.root.join(rel);
740 fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
741 fs::write(&abs, contents).expect("write raw");
742 }
743
744 /// Build the type-folder `index.jsonl` sidecars from the content written
745 /// so far — the state a real store is always in (write-through), and the
746 /// candidate set `backlinks` reads. Call after writing files in any test
747 /// that exercises `backlinks` or an incoming-direction `neighborhood`.
748 fn reindex(&self) {
749 crate::index::Index::rebuild_all(&self.store).expect("rebuild sidecars");
750 }
751
752 fn p(&self, rel: &str) -> PathBuf {
753 PathBuf::from(rel)
754 }
755 }
756
757 fn paths(v: &[PathBuf]) -> Vec<String> {
758 v.iter()
759 .map(|p| p.to_string_lossy().replace('\\', "/"))
760 .collect()
761 }
762
763 // ── normalize_target ────────────────────────────────────────────────────
764
765 #[test]
766 fn normalize_strips_md_and_leading_dotslash() {
767 assert_eq!(
768 normalize_target(Path::new("records/contacts/sarah.md")),
769 "records/contacts/sarah"
770 );
771 assert_eq!(
772 normalize_target(Path::new("./wiki/people/elena")),
773 "wiki/people/elena"
774 );
775 assert_eq!(normalize_target(Path::new("/records/x")), "records/x");
776 // Bare and `.md` forms must collapse to the same key, or edges won't unify.
777 assert_eq!(
778 normalize_target(Path::new("a/b")),
779 normalize_target(Path::new("a/b.md"))
780 );
781 }
782
783 // ── extract_link_targets (forwardlinks core) ────────────────────────────
784
785 #[test]
786 fn extract_handles_display_text_and_md_suffix() {
787 let body = "See [[wiki/people/sarah-chen|Sarah]] and [[records/contacts/elena.md]].";
788 let got = extract_link_targets(body);
789 assert_eq!(
790 got,
791 vec!["wiki/people/sarah-chen", "records/contacts/elena"]
792 );
793 }
794
795 #[test]
796 fn extract_ignores_external_markdown_links() {
797 // Standard markdown links are NOT wiki-links and must not be extracted
798 // (SPEC: external refs don't participate in the graph).
799 let body = "[Acme](https://acme.io) but [[records/companies/acme]] is internal.";
800 let got = extract_link_targets(body);
801 assert_eq!(got, vec!["records/companies/acme"]);
802 }
803
804 #[test]
805 fn extract_display_text_is_not_treated_as_a_target() {
806 // A `|display` segment that looks path-like must not become a target;
807 // only the part before `|` is the link target.
808 let body = "[[records/contacts/sarah|sources/emails/decoy]]";
809 let got = extract_link_targets(body);
810 assert_eq!(got, vec!["records/contacts/sarah"]);
811 }
812
813 // ── rewrite_links_to (write-side twin of backlinks) ─────────────────────
814
815 #[test]
816 fn rewrite_plain_link_to_canonical_new_target() {
817 let got = rewrite_links_to(
818 "See [[records/contacts/sarah-chen]] today.",
819 Path::new("records/contacts/sarah-chen"),
820 Path::new("records/contacts/sarah-chen-acme"),
821 );
822 assert_eq!(got, "See [[records/contacts/sarah-chen-acme]] today.");
823 }
824
825 #[test]
826 fn rewrite_preserves_display_override() {
827 let got = rewrite_links_to(
828 "With [[records/contacts/sarah-chen|Sarah]].",
829 Path::new("records/contacts/sarah-chen"),
830 Path::new("records/contacts/sarah-chen-acme"),
831 );
832 assert_eq!(got, "With [[records/contacts/sarah-chen-acme|Sarah]].");
833 }
834
835 #[test]
836 fn rewrite_matches_md_suffixed_old_and_emits_bare_new() {
837 // The `.md` spelling of the old target must match (it normalizes to the
838 // same key the read side uses), and the new target is emitted bare —
839 // the writer doctrine validate enforces (`WIKI_LINK_HAS_EXTENSION`).
840 let got = rewrite_links_to(
841 "[[records/contacts/sarah-chen.md]]",
842 Path::new("records/contacts/sarah-chen"),
843 Path::new("records/contacts/new.md"),
844 );
845 assert_eq!(got, "[[records/contacts/new]]");
846 }
847
848 #[test]
849 fn rewrite_leaves_prefix_collisions_and_short_form_untouched() {
850 // Boundary correctness, anchored to the SAME normalize_target the read
851 // side keys on: `records/contacts/sarah-chen` must NOT match the longer
852 // `[[…-jr]]`, the short-form `[[sarah-chen]]`, or an unrelated target.
853 let input = "[[records/contacts/sarah-chen-jr]] [[sarah-chen]] [[wiki/topics/x]]";
854 let got = rewrite_links_to(
855 input,
856 Path::new("records/contacts/sarah-chen"),
857 Path::new("records/contacts/new"),
858 );
859 assert_eq!(got, input, "no genuine edge to the seed → text unchanged");
860 }
861
862 #[test]
863 fn rewrite_handles_multiple_occurrences_and_mixed_spellings() {
864 let got = rewrite_links_to(
865 "[[records/x]] then [[./records/x]] and [[records/x.md|d]] end",
866 Path::new("records/x"),
867 Path::new("records/y"),
868 );
869 // All three spellings of the same target retarget; the display survives.
870 assert_eq!(
871 got,
872 "[[records/y]] then [[records/y]] and [[records/y|d]] end"
873 );
874 }
875
876 #[test]
877 fn rewrite_retargets_exactly_the_edges_the_core_parser_sees() {
878 // The load-bearing property of moving the rewrite into core: the write
879 // side must operate on EXACTLY the edge set the read side recognizes —
880 // the same `extract_link_targets` / `normalize_target` grammar that
881 // `forwardlinks` is built on. Anchor the test to that grammar (via
882 // `forwardlinks` on a real file) rather than re-listing literals, so a
883 // future divergence between the read parser and the write rewrite fails
884 // here. (Coupled to `forwardlinks` — the single-file edge extractor —
885 // not the multi-file `backlinks` traversal, so it tests the grammar, not
886 // the walk.)
887 let fx = Fixture::new();
888 let body = "Met [[records/contacts/sarah.md|Sarah]] and not [[records/contacts/sarah-2]].";
889 fx.write("wiki/people/bio.md", "wiki-page", "bio", body);
890
891 // Read side: the parser sees two outgoing edges, both in canonical bare
892 // form (the `.md` spelling collapsed). `sarah` is a real edge here.
893 let edges = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
894 assert_eq!(
895 paths(&edges),
896 vec!["records/contacts/sarah", "records/contacts/sarah-2"],
897 "fixture must contain exactly the two edges this test reasons about"
898 );
899
900 // Write side: rewriting `sarah → sarah-chen` must retarget the edge the
901 // parser recognized (matching the `.md` spelling), preserve the display,
902 // and leave the unrelated `sarah-2` edge untouched.
903 let got = rewrite_links_to(
904 body,
905 Path::new("records/contacts/sarah"),
906 Path::new("records/contacts/sarah-chen"),
907 );
908 assert_eq!(
909 got,
910 "Met [[records/contacts/sarah-chen|Sarah]] and not [[records/contacts/sarah-2]]."
911 );
912
913 // Cross-check through the parser: the rewritten text's edge set is the
914 // original with `sarah` swapped for `sarah-chen` — proving the rewrite
915 // moved exactly one edge, the one the read side keyed on.
916 fx.write("wiki/people/bio.md", "wiki-page", "bio", &got);
917 let after = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
918 assert_eq!(
919 paths(&after),
920 vec!["records/contacts/sarah-2", "records/contacts/sarah-chen"],
921 "after rewrite the parser must see the new target and not the old"
922 );
923 }
924
925 #[test]
926 fn rewrite_empty_old_target_is_a_no_op() {
927 // A degenerate `old` (normalizes to empty) must never rewrite anything,
928 // mirroring backlinks' empty-target guard.
929 let input = "[[records/x]] [[]] text";
930 let got = rewrite_links_to(input, Path::new(""), Path::new("records/y"));
931 assert_eq!(got, input);
932 }
933
934 #[test]
935 fn rewrite_no_match_returns_input_unchanged() {
936 let input = "no links, [external](https://x), and [[wiki/topics/y]]";
937 let got = rewrite_links_to(input, Path::new("records/x"), Path::new("records/z"));
938 assert_eq!(got, input);
939 }
940
941 // ── forwardlinks ─────────────────────────────────────────────────────────
942
943 #[test]
944 fn forwardlinks_returns_sorted_deduped_targets_excluding_self() {
945 let fx = Fixture::new();
946 fx.write(
947 "wiki/projects/renewal.md",
948 "wiki-page",
949 "Renewal project",
950 "Links: [[records/contacts/sarah]] [[records/companies/acme]] [[records/contacts/sarah]] and itself [[wiki/projects/renewal]].",
951 );
952 // The targets need not exist on disk for forwardlinks (it reads the one
953 // file only). Self-links are dropped; duplicates collapse; sorted asc.
954 let got = forwardlinks(&fx.store, &fx.p("wiki/projects/renewal.md")).unwrap();
955 assert_eq!(
956 paths(&got),
957 vec!["records/companies/acme", "records/contacts/sarah"]
958 );
959 }
960
961 #[test]
962 fn forwardlinks_picks_up_wiki_links_in_frontmatter() {
963 // SPEC: wiki-links appear in scalar + block-sequence frontmatter fields,
964 // not just the body. forwardlinks must follow those edges too.
965 let fx = Fixture::new();
966 fx.write_raw(
967 "records/meetings/m1.md",
968 "---\ntype: meeting\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Renewal sync\ncompany: [[records/companies/acme]]\nattendees:\n - [[records/contacts/sarah]]\n - [[records/contacts/elena]]\n---\nNotes about [[wiki/projects/renewal]].\n",
969 );
970 let got = forwardlinks(&fx.store, &fx.p("records/meetings/m1.md")).unwrap();
971 assert_eq!(
972 paths(&got),
973 vec![
974 "records/companies/acme",
975 "records/contacts/elena",
976 "records/contacts/sarah",
977 "wiki/projects/renewal",
978 ]
979 );
980 }
981
982 #[test]
983 fn forwardlinks_missing_file_is_empty_not_error() {
984 let fx = Fixture::new();
985 let got = forwardlinks(&fx.store, &fx.p("wiki/people/ghost.md")).unwrap();
986 assert!(got.is_empty());
987 }
988
989 #[test]
990 fn forwardlinks_resolves_seed_given_without_md_extension() {
991 let fx = Fixture::new();
992 fx.write(
993 "wiki/people/sarah.md",
994 "wiki-page",
995 "Sarah bio",
996 "Works at [[records/companies/acme]].",
997 );
998 // Seed passed in bare wiki-link form (no `.md`) must still resolve.
999 let got = forwardlinks(&fx.store, &fx.p("wiki/people/sarah")).unwrap();
1000 assert_eq!(paths(&got), vec!["records/companies/acme"]);
1001 }
1002
1003 // ── backlinks ──────────────────────────────────────────────────────────
1004
1005 #[test]
1006 fn backlinks_finds_incoming_across_layers_and_link_forms() {
1007 let fx = Fixture::new();
1008 // Target.
1009 fx.write("records/contacts/sarah.md", "contact", "Sarah Chen", "");
1010 // Three different incoming-link spellings, all to the same target.
1011 fx.write(
1012 "wiki/people/sarah.md",
1013 "wiki-page",
1014 "bio",
1015 "See [[records/contacts/sarah]].",
1016 );
1017 fx.write(
1018 "records/meetings/m1.md",
1019 "meeting",
1020 "Renewal call",
1021 "Attendee [[records/contacts/sarah|Sarah]].",
1022 );
1023 fx.write(
1024 "sources/emails/e1.md",
1025 "email",
1026 "Hi",
1027 "From [[records/contacts/sarah.md]] today.",
1028 );
1029 // A file that links to a DIFFERENT contact must not be a backlink.
1030 fx.write(
1031 "wiki/people/other.md",
1032 "wiki-page",
1033 "x",
1034 "[[records/contacts/sarah-2]]",
1035 );
1036 fx.reindex();
1037
1038 // All three link forms ([[x]], [[x|d]], [[x.md]]) resolve to the same
1039 // target and are found; the linkers are returned in canonical bare form.
1040 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1041 assert_eq!(
1042 paths(&got),
1043 vec![
1044 "records/meetings/m1",
1045 "sources/emails/e1",
1046 "wiki/people/sarah",
1047 ]
1048 );
1049 }
1050
1051 #[test]
1052 fn backlinks_and_forwardlinks_round_trip_on_same_key() {
1053 // If A forwardlinks to B, then B backlinks to A — both expressed in the
1054 // identical bare key, so neighborhood can dedup across directions.
1055 let fx = Fixture::new();
1056 fx.write(
1057 "wiki/people/a.md",
1058 "wiki-page",
1059 "A",
1060 "Knows [[wiki/people/b]].",
1061 );
1062 fx.write("wiki/people/b.md", "wiki-page", "B", "");
1063 fx.reindex();
1064 let fwd = forwardlinks(&fx.store, &fx.p("wiki/people/a.md")).unwrap();
1065 let back = backlinks(&fx.store, &fx.p("wiki/people/b.md")).unwrap();
1066 assert_eq!(paths(&fwd), vec!["wiki/people/b"]);
1067 assert_eq!(paths(&back), vec!["wiki/people/a"]);
1068 }
1069
1070 #[test]
1071 fn backlinks_does_not_match_path_prefix_collisions() {
1072 let fx = Fixture::new();
1073 fx.write("records/contacts/sam.md", "contact", "Sam", "");
1074 // `sam-smith` shares the `sam` prefix; must NOT count as a backlink to `sam`.
1075 fx.write(
1076 "wiki/people/x.md",
1077 "wiki-page",
1078 "x",
1079 "[[records/contacts/sam-smith]]",
1080 );
1081 // The genuine backlink.
1082 fx.write(
1083 "wiki/people/y.md",
1084 "wiki-page",
1085 "y",
1086 "[[records/contacts/sam]]",
1087 );
1088 fx.reindex();
1089
1090 let got = backlinks(&fx.store, &fx.p("records/contacts/sam")).unwrap();
1091 assert_eq!(paths(&got), vec!["wiki/people/y"]);
1092 }
1093
1094 #[test]
1095 fn backlinks_excludes_self_reference() {
1096 let fx = Fixture::new();
1097 // A page that links to itself is not its own backlink.
1098 fx.write(
1099 "wiki/synthesis/overview.md",
1100 "wiki-page",
1101 "Overview",
1102 "This page [[wiki/synthesis/overview]] references itself.",
1103 );
1104 fx.reindex();
1105 let got = backlinks(&fx.store, &fx.p("wiki/synthesis/overview.md")).unwrap();
1106 assert!(
1107 got.is_empty(),
1108 "self-link must not appear as a backlink, got {got:?}"
1109 );
1110 }
1111
1112 #[test]
1113 fn backlinks_empty_when_nobody_links() {
1114 let fx = Fixture::new();
1115 fx.write("records/contacts/lonely.md", "contact", "Lonely", "");
1116 fx.write(
1117 "wiki/people/unrelated.md",
1118 "wiki-page",
1119 "x",
1120 "[[records/companies/acme]]",
1121 );
1122 fx.reindex();
1123 let got = backlinks(&fx.store, &fx.p("records/contacts/lonely.md")).unwrap();
1124 assert!(got.is_empty());
1125 }
1126
1127 #[test]
1128 fn backlinks_ignores_index_and_meta_files() {
1129 let fx = Fixture::new();
1130 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1131 // An index.md that lists the target must NOT be reported as a backlink
1132 // (indexes are catalog, not relationship edges).
1133 fx.write_raw(
1134 "records/contacts/index.md",
1135 "---\ntype: index\nscope: folder\nfolder: records/contacts\n---\n- [[records/contacts/sarah]] — Sarah\n",
1136 );
1137 fx.reindex();
1138 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1139 assert!(got.is_empty(), "index.md must be excluded, got {got:?}");
1140 }
1141
1142 #[test]
1143 fn backlinks_finds_body_only_edge_not_in_frontmatter_links_field() {
1144 // REGRESSION: the sidecar's `links` field carries only the file's
1145 // frontmatter `links:` list; it does NOT include wiki-links written in
1146 // the body or in other typed frontmatter fields. Answering backlinks
1147 // from `links[]` alone would silently miss this edge. The candidate set
1148 // is sidecar-bounded, but each candidate's edge is confirmed by parsing
1149 // the file (the same extraction forwardlinks uses), so a body-only link
1150 // must still register as a backlink.
1151 let fx = Fixture::new();
1152 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1153 // `meeting.md` links to sarah ONLY in its body — its frontmatter has no
1154 // `links:` field at all, so the sidecar record's `links` is empty.
1155 fx.write(
1156 "records/meetings/standup.md",
1157 "meeting",
1158 "Standup",
1159 "Discussed renewal with [[records/contacts/sarah]].",
1160 );
1161 fx.reindex();
1162
1163 // Guard the premise: the sidecar record really does carry an empty
1164 // `links` (so this test fails loudly if the index ever starts extracting
1165 // body links — at which point the backlink predicate could be revisited).
1166 let rec = fx
1167 .store
1168 .find_by_type("meeting")
1169 .unwrap()
1170 .into_iter()
1171 .find(|r| r.path == fx.p("records/meetings/standup.md"))
1172 .expect("meeting is catalogued in its sidecar");
1173 assert!(
1174 rec.links.is_empty(),
1175 "premise: the body link is NOT projected into the sidecar `links` field; got {:?}",
1176 rec.links
1177 );
1178
1179 // Yet backlinks still finds it — because it confirms via the file parse,
1180 // not via the sidecar `links` field.
1181 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1182 assert_eq!(
1183 paths(&got),
1184 vec!["records/meetings/standup"],
1185 "a body-only wiki-link must register as a backlink"
1186 );
1187 }
1188
1189 #[test]
1190 fn backlinks_finds_edge_in_typed_frontmatter_field() {
1191 // A wiki-link inside a *typed* frontmatter field (`company:`) is a real
1192 // edge forwardlinks follows, so backlinks must find it too — even though
1193 // the sidecar's `links` field (the `links:` key only) does not list it.
1194 let fx = Fixture::new();
1195 fx.write("records/companies/acme.md", "company", "Acme", "");
1196 fx.write_raw(
1197 "records/contacts/sarah.md",
1198 "---\ntype: contact\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Sarah\ncompany: [[records/companies/acme]]\n---\nBody with no links.\n",
1199 );
1200 fx.reindex();
1201 let got = backlinks(&fx.store, &fx.p("records/companies/acme.md")).unwrap();
1202 assert_eq!(
1203 paths(&got),
1204 vec!["records/contacts/sarah"],
1205 "a wiki-link in a typed frontmatter field is an incoming edge"
1206 );
1207 }
1208
1209 #[test]
1210 fn backlinks_unscoped_scans_the_tree_not_only_the_sidecar() {
1211 // REGRESSION (loop budget): an UNSCOPED `backlinks` must resolve incoming
1212 // edges with a SINGLE embedded-ripgrep pass over the tree
1213 // (`Store::find_links_to`), NOT by reading the sidecar candidate set and
1214 // then `read_to_string`-confirming each candidate (which re-opens every
1215 // content file → O(store); the documented >3x budget miss). A ripgrep
1216 // pass is the same scan engine `validate`/`rename`/`dbmd links` ride, and
1217 // the tree — not the sidecar — is its ground truth: a linker that is on
1218 // disk but absent from every sidecar (stale / never-built index) is still
1219 // found. We assert that behaviorally, which fails loudly if the unscoped
1220 // path ever reverts to the sidecar-bounded per-candidate confirm loop
1221 // (that loop would NOT find the unindexed linker).
1222 let fx = Fixture::new();
1223 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1224 fx.write(
1225 "wiki/people/indexed.md",
1226 "wiki-page",
1227 "Indexed",
1228 "[[records/contacts/sarah]]",
1229 );
1230 fx.reindex(); // builds sidecars for sarah + the indexed linker
1231
1232 // Now drop a NEW linker on disk WITHOUT reindexing — it is on disk but in
1233 // no sidecar.
1234 fx.write(
1235 "wiki/people/unindexed.md",
1236 "wiki-page",
1237 "Unindexed",
1238 "[[records/contacts/sarah]]",
1239 );
1240
1241 let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1242 assert_eq!(
1243 paths(&got),
1244 vec!["wiki/people/indexed", "wiki/people/unindexed"],
1245 "unscoped backlinks ripgrep-scans the tree, so the on-disk-but-unindexed \
1246 linker is found too — not only the sidecar-catalogued one"
1247 );
1248 }
1249
1250 #[test]
1251 fn backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk() {
1252 // REGRESSION (scale contract): the SCOPED form (`--type` / `--in`) is the
1253 // I/O-scoped path — it enumerates candidates from the relevant type-folder
1254 // `index.jsonl` sidecars and parses only those, NOT a whole-tree walk.
1255 // That is what makes the scope an I/O scope, not just a result filter:
1256 // a linker that is on disk but ABSENT from the sidecar (stale / never-built
1257 // index) is NOT discovered by the scoped call (the sidecar bounds which
1258 // files are candidates). This is the loop-vs-walk distinction the SPEC
1259 // draws, and it is exactly the inverse of the unscoped tree scan above.
1260 let fx = Fixture::new();
1261 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1262 fx.write(
1263 "wiki/people/indexed.md",
1264 "wiki-page",
1265 "Indexed",
1266 "[[records/contacts/sarah]]",
1267 );
1268 fx.reindex(); // builds sidecars for sarah + the indexed linker
1269
1270 // Drop a NEW wiki-page linker on disk WITHOUT reindexing — on disk, in no
1271 // sidecar.
1272 fx.write(
1273 "wiki/people/unindexed.md",
1274 "wiki-page",
1275 "Unindexed",
1276 "[[records/contacts/sarah]]",
1277 );
1278
1279 // Scoped to the `wiki-page` type: the candidate set is the sidecar's, so
1280 // only the catalogued linker is found — the unindexed one is invisible.
1281 let only_wiki_pages = vec!["wiki-page".to_string()];
1282 let got = backlinks_filtered(
1283 &fx.store,
1284 &fx.p("records/contacts/sarah.md"),
1285 &only_wiki_pages,
1286 None,
1287 )
1288 .unwrap();
1289 assert_eq!(
1290 paths(&got),
1291 vec!["wiki/people/indexed"],
1292 "scoped backlinks reads the sidecar candidate set; the on-disk-but-unindexed \
1293 linker is not tree-walked"
1294 );
1295 }
1296
1297 #[test]
1298 fn backlinks_filtered_type_scopes_the_candidate_set() {
1299 // `--type` narrows backlinks to linkers of that type. Two files link to
1300 // the target — one `meeting`, one `wiki-page`; filtering to `meeting`
1301 // returns only the meeting.
1302 let fx = Fixture::new();
1303 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1304 fx.write(
1305 "records/meetings/m1.md",
1306 "meeting",
1307 "Call",
1308 "[[records/contacts/sarah]]",
1309 );
1310 fx.write(
1311 "wiki/people/bio.md",
1312 "wiki-page",
1313 "Bio",
1314 "[[records/contacts/sarah]]",
1315 );
1316 fx.reindex();
1317
1318 let only_meetings = vec!["meeting".to_string()];
1319 let got = backlinks_filtered(
1320 &fx.store,
1321 &fx.p("records/contacts/sarah.md"),
1322 &only_meetings,
1323 None,
1324 )
1325 .unwrap();
1326 assert_eq!(
1327 paths(&got),
1328 vec!["records/meetings/m1"],
1329 "--type meeting must exclude the wiki-page linker"
1330 );
1331
1332 // Unfiltered, both come back — proving the filter (not the data) dropped one.
1333 let all = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1334 assert_eq!(paths(&all), vec!["records/meetings/m1", "wiki/people/bio"]);
1335 }
1336
1337 #[test]
1338 fn backlinks_filtered_layer_scopes_the_candidate_set() {
1339 // `--in <layer>` narrows backlinks to linkers under that layer.
1340 let fx = Fixture::new();
1341 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1342 fx.write(
1343 "records/meetings/m1.md",
1344 "meeting",
1345 "Call",
1346 "[[records/contacts/sarah]]",
1347 );
1348 fx.write(
1349 "wiki/people/bio.md",
1350 "wiki-page",
1351 "Bio",
1352 "[[records/contacts/sarah]]",
1353 );
1354 fx.reindex();
1355
1356 let got = backlinks_filtered(
1357 &fx.store,
1358 &fx.p("records/contacts/sarah.md"),
1359 &[],
1360 Some(Layer::Wiki),
1361 )
1362 .unwrap();
1363 assert_eq!(
1364 paths(&got),
1365 vec!["wiki/people/bio"],
1366 "--in wiki must keep only the wiki-layer linker"
1367 );
1368
1369 let records_only = backlinks_filtered(
1370 &fx.store,
1371 &fx.p("records/contacts/sarah.md"),
1372 &[],
1373 Some(Layer::Records),
1374 )
1375 .unwrap();
1376 assert_eq!(paths(&records_only), vec!["records/meetings/m1"]);
1377 }
1378
1379 // ── neighborhood ─────────────────────────────────────────────────────────
1380
1381 #[test]
1382 fn neighborhood_hops_zero_is_empty() {
1383 let fx = Fixture::new();
1384 fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
1385 fx.write("wiki/people/b.md", "wiki-page", "B", "");
1386 let slice = neighborhood(
1387 &fx.store,
1388 &fx.p("wiki/people/a.md"),
1389 0,
1390 &[],
1391 Direction::Both,
1392 )
1393 .unwrap();
1394 assert_eq!(slice.seed, fx.p("wiki/people/a"));
1395 assert!(slice.nodes.is_empty());
1396 }
1397
1398 #[test]
1399 fn neighborhood_outgoing_one_hop_reads_summary_and_type() {
1400 let fx = Fixture::new();
1401 fx.write(
1402 "wiki/people/a.md",
1403 "wiki-page",
1404 "Person A",
1405 "Knows [[records/contacts/b]].",
1406 );
1407 fx.write("records/contacts/b.md", "contact", "Contact B summary", "");
1408 let slice = neighborhood(
1409 &fx.store,
1410 &fx.p("wiki/people/a.md"),
1411 1,
1412 &[],
1413 Direction::Outgoing,
1414 )
1415 .unwrap();
1416 assert_eq!(slice.nodes.len(), 1);
1417 let n = &slice.nodes[0];
1418 assert_eq!(n.path, fx.p("records/contacts/b"));
1419 assert_eq!(n.summary, "Contact B summary");
1420 assert_eq!(n.type_.as_deref(), Some("contact"));
1421 assert_eq!(n.hops, 1);
1422 assert_eq!(n.via, Some((fx.p("wiki/people/a"), Direction::Outgoing)));
1423 }
1424
1425 #[test]
1426 fn neighborhood_incoming_only_walks_backlinks() {
1427 let fx = Fixture::new();
1428 // a -> seed (incoming to seed). seed -> c (outgoing from seed).
1429 fx.write(
1430 "wiki/people/seed.md",
1431 "wiki-page",
1432 "Seed",
1433 "Out to [[wiki/people/c]].",
1434 );
1435 fx.write(
1436 "wiki/people/a.md",
1437 "wiki-page",
1438 "A",
1439 "In to [[wiki/people/seed]].",
1440 );
1441 fx.write("wiki/people/c.md", "wiki-page", "C", "");
1442 fx.reindex();
1443 let slice = neighborhood(
1444 &fx.store,
1445 &fx.p("wiki/people/seed.md"),
1446 1,
1447 &[],
1448 Direction::Incoming,
1449 )
1450 .unwrap();
1451 // Incoming direction: only `a` (which links TO seed), not `c`.
1452 assert_eq!(
1453 paths(
1454 &slice
1455 .nodes
1456 .iter()
1457 .map(|n| n.path.clone())
1458 .collect::<Vec<_>>()
1459 ),
1460 vec!["wiki/people/a"]
1461 );
1462 assert_eq!(
1463 slice.nodes[0].via,
1464 Some((fx.p("wiki/people/seed"), Direction::Incoming))
1465 );
1466 }
1467
1468 #[test]
1469 fn neighborhood_bounded_bfs_respects_hop_limit_and_min_distance() {
1470 let fx = Fixture::new();
1471 // Chain a -> b -> c -> d, all outgoing.
1472 fx.write("wiki/c/a.md", "wiki-page", "A", "[[wiki/c/b]]");
1473 fx.write("wiki/c/b.md", "wiki-page", "B", "[[wiki/c/c]]");
1474 fx.write("wiki/c/c.md", "wiki-page", "C", "[[wiki/c/d]]");
1475 fx.write("wiki/c/d.md", "wiki-page", "D", "");
1476 let slice =
1477 neighborhood(&fx.store, &fx.p("wiki/c/a.md"), 2, &[], Direction::Outgoing).unwrap();
1478 // 2 hops reaches b (1) and c (2), not d (3).
1479 let by_path: HashMap<String, u32> = slice
1480 .nodes
1481 .iter()
1482 .map(|n| (n.path.to_string_lossy().to_string(), n.hops))
1483 .collect();
1484 assert_eq!(by_path.get("wiki/c/b").copied(), Some(1));
1485 assert_eq!(by_path.get("wiki/c/c").copied(), Some(2));
1486 assert_eq!(by_path.get("wiki/c/d"), None);
1487 assert_eq!(slice.nodes.len(), 2);
1488 }
1489
1490 #[test]
1491 fn neighborhood_records_min_hops_on_diamond() {
1492 let fx = Fixture::new();
1493 // Diamond: a -> b, a -> c, b -> d, c -> d. d is reachable at hop 2 from
1494 // either branch; it must be recorded once, at hop 2.
1495 fx.write("wiki/d/a.md", "wiki-page", "A", "[[wiki/d/b]] [[wiki/d/c]]");
1496 fx.write("wiki/d/b.md", "wiki-page", "B", "[[wiki/d/d]]");
1497 fx.write("wiki/d/c.md", "wiki-page", "C", "[[wiki/d/d]]");
1498 fx.write("wiki/d/d.md", "wiki-page", "D", "");
1499 let slice =
1500 neighborhood(&fx.store, &fx.p("wiki/d/a.md"), 3, &[], Direction::Outgoing).unwrap();
1501 let d_nodes: Vec<&ContextNode> = slice
1502 .nodes
1503 .iter()
1504 .filter(|n| n.path == fx.p("wiki/d/d"))
1505 .collect();
1506 assert_eq!(d_nodes.len(), 1, "d must appear exactly once");
1507 assert_eq!(d_nodes[0].hops, 2, "d's min distance from a is 2");
1508 // b and c at hop 1, d at hop 2 => 3 nodes total, no cycle blowup.
1509 assert_eq!(slice.nodes.len(), 3);
1510 }
1511
1512 #[test]
1513 fn neighborhood_type_filter_narrows_results_but_not_traversal() {
1514 let fx = Fixture::new();
1515 // seed -> contact -> meeting. Filtering to `meeting` must still reach
1516 // the meeting THROUGH the (excluded) contact at hop 2.
1517 fx.write(
1518 "wiki/people/seed.md",
1519 "wiki-page",
1520 "Seed",
1521 "[[records/contacts/sarah]]",
1522 );
1523 fx.write(
1524 "records/contacts/sarah.md",
1525 "contact",
1526 "Sarah",
1527 "[[records/meetings/m1]]",
1528 );
1529 fx.write("records/meetings/m1.md", "meeting", "Renewal call", "");
1530 let only_meetings = vec!["meeting".to_string()];
1531 let slice = neighborhood(
1532 &fx.store,
1533 &fx.p("wiki/people/seed.md"),
1534 2,
1535 &only_meetings,
1536 Direction::Outgoing,
1537 )
1538 .unwrap();
1539 // Only the meeting is returned; the contact is traversed but filtered out.
1540 assert_eq!(slice.nodes.len(), 1);
1541 assert_eq!(slice.nodes[0].path, fx.p("records/meetings/m1"));
1542 assert_eq!(slice.nodes[0].type_.as_deref(), Some("meeting"));
1543 assert_eq!(slice.nodes[0].hops, 2);
1544 }
1545
1546 #[test]
1547 fn neighborhood_cycle_terminates() {
1548 let fx = Fixture::new();
1549 // a <-> b cycle. Must not loop forever; each appears once.
1550 fx.write("wiki/g/a.md", "wiki-page", "A", "[[wiki/g/b]]");
1551 fx.write("wiki/g/b.md", "wiki-page", "B", "[[wiki/g/a]]");
1552 fx.reindex();
1553 let slice =
1554 neighborhood(&fx.store, &fx.p("wiki/g/a.md"), 10, &[], Direction::Both).unwrap();
1555 // From a: b is the only other node (a is the seed, excluded).
1556 assert_eq!(
1557 paths(
1558 &slice
1559 .nodes
1560 .iter()
1561 .map(|n| n.path.clone())
1562 .collect::<Vec<_>>()
1563 ),
1564 vec!["wiki/g/b"]
1565 );
1566 }
1567
1568 // ── orphans ──────────────────────────────────────────────────────────────
1569
1570 #[test]
1571 fn orphans_finds_files_with_no_edges_either_direction() {
1572 let fx = Fixture::new();
1573 // Wired pair: a links to b (a has outgoing, b has incoming).
1574 fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
1575 fx.write("wiki/people/b.md", "wiki-page", "B", "");
1576 // Orphan: no links in or out.
1577 fx.write(
1578 "sources/emails/lonely.md",
1579 "email",
1580 "Lonely email",
1581 "Just text, no links.",
1582 );
1583 let got = orphans(&fx.store, None).unwrap();
1584 assert_eq!(paths(&got), vec!["sources/emails/lonely.md"]);
1585 }
1586
1587 #[test]
1588 fn orphans_file_with_only_outgoing_is_not_orphan() {
1589 let fx = Fixture::new();
1590 // `a` links out to a target that does NOT exist on disk. `a` still has
1591 // an outgoing edge, so it is not an orphan (orphan = no edges at all).
1592 fx.write(
1593 "wiki/people/a.md",
1594 "wiki-page",
1595 "A",
1596 "[[records/contacts/ghost]]",
1597 );
1598 let got = orphans(&fx.store, None).unwrap();
1599 assert!(
1600 !paths(&got).contains(&"wiki/people/a.md".to_string()),
1601 "outgoing-only is not an orphan: {got:?}"
1602 );
1603 }
1604
1605 #[test]
1606 fn orphans_file_with_only_incoming_is_not_orphan() {
1607 let fx = Fixture::new();
1608 // `target` has no outgoing links but IS linked to by `linker` — not an orphan.
1609 fx.write("records/contacts/target.md", "contact", "Target", "");
1610 fx.write(
1611 "wiki/people/linker.md",
1612 "wiki-page",
1613 "Linker",
1614 "[[records/contacts/target]]",
1615 );
1616 let got = orphans(&fx.store, None).unwrap();
1617 assert!(
1618 !paths(&got).contains(&"records/contacts/target.md".to_string()),
1619 "incoming-only is not an orphan: {got:?}"
1620 );
1621 // `linker` has outgoing, so also not an orphan.
1622 assert!(!paths(&got).contains(&"wiki/people/linker.md".to_string()));
1623 }
1624
1625 #[test]
1626 fn orphans_incoming_link_from_other_layer_unorphans() {
1627 let fx = Fixture::new();
1628 // Candidate in records/, only incoming edge comes from wiki/ — a
1629 // cross-layer link must still un-orphan it even when scoped to records.
1630 fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1631 fx.write(
1632 "wiki/people/sarah.md",
1633 "wiki-page",
1634 "bio",
1635 "[[records/contacts/sarah]]",
1636 );
1637 // A genuine orphan in records/ to prove the scope still returns something.
1638 fx.write("records/contacts/nemo.md", "contact", "Nemo", "");
1639 let got = orphans(&fx.store, Some(Layer::Records)).unwrap();
1640 assert_eq!(paths(&got), vec!["records/contacts/nemo.md"]);
1641 }
1642
1643 #[test]
1644 fn orphans_layer_scope_filters_candidates() {
1645 let fx = Fixture::new();
1646 // One orphan per layer.
1647 fx.write("sources/emails/s.md", "email", "S", "no links");
1648 fx.write("records/contacts/r.md", "contact", "R", "");
1649 fx.write("wiki/people/w.md", "wiki-page", "W", "");
1650 let only_wiki = orphans(&fx.store, Some(Layer::Wiki)).unwrap();
1651 assert_eq!(paths(&only_wiki), vec!["wiki/people/w.md"]);
1652 let only_sources = orphans(&fx.store, Some(Layer::Sources)).unwrap();
1653 assert_eq!(paths(&only_sources), vec!["sources/emails/s.md"]);
1654 // No scope: all three, sorted (records, sources, wiki).
1655 let all = orphans(&fx.store, None).unwrap();
1656 assert_eq!(
1657 paths(&all),
1658 vec![
1659 "records/contacts/r.md",
1660 "sources/emails/s.md",
1661 "wiki/people/w.md",
1662 ]
1663 );
1664 }
1665
1666 #[test]
1667 fn orphans_self_link_does_not_count_as_an_edge() {
1668 let fx = Fixture::new();
1669 // A page that only links to itself has no real edges => still an orphan.
1670 fx.write(
1671 "wiki/synthesis/solo.md",
1672 "wiki-page",
1673 "Solo",
1674 "I reference [[wiki/synthesis/solo]] only.",
1675 );
1676 let got = orphans(&fx.store, None).unwrap();
1677 assert_eq!(paths(&got), vec!["wiki/synthesis/solo.md"]);
1678 }
1679
1680 #[test]
1681 fn orphans_excludes_index_and_db_files() {
1682 let fx = Fixture::new();
1683 // A lone index.md / DB.md must never be reported as an orphan content file.
1684 fx.write_raw(
1685 "wiki/index.md",
1686 "---\ntype: index\nscope: layer\nfolder: wiki\n---\n# wiki\n",
1687 );
1688 fx.write(
1689 "wiki/people/real-orphan.md",
1690 "wiki-page",
1691 "Real",
1692 "no links",
1693 );
1694 let got = orphans(&fx.store, None).unwrap();
1695 assert_eq!(paths(&got), vec!["wiki/people/real-orphan.md"]);
1696 }
1697
1698 // ── frontmatter_block helper ─────────────────────────────────────────────
1699
1700 #[test]
1701 fn frontmatter_block_extracts_between_fences() {
1702 let text = "---\ntype: contact\nsummary: hi\n---\nbody here\n";
1703 assert_eq!(
1704 frontmatter_block(text),
1705 Some("type: contact\nsummary: hi\n")
1706 );
1707 }
1708
1709 #[test]
1710 fn frontmatter_block_none_without_leading_fence() {
1711 let text = "no frontmatter here\n";
1712 assert_eq!(frontmatter_block(text), None);
1713 }
1714}