Skip to main content

dbmd_core/
graph.rs

1//! `graph` — the wiki-link **relationship layer**.
2//!
3//! Wiki-links are curated-relevance edges (the LLM wrote them), so the graph's
4//! job is to **assemble the relevant context around a seed**, not to be
5//! analyzed. **All ops are on-demand — there is no maintained graph** (a
6//! persistent graph is the roadmap engine).
7//!
8//! [`backlinks`] / [`forwardlinks`] are loop ops (O(changed), never O(store)).
9//! [`neighborhood`] is the high-value context-hydration op. [`orphans`] is a
10//! SWEEP curation worklist.
11//!
12//! Whole-graph analytics (connected components, cycle detection, shortest
13//! path, sinks/sources, DOT/JSON export) are deliberately **not** here — a
14//! human studying the graph opens the store in Obsidian; broken-link detection
15//! is [`crate::validate`]'s job (`WIKI_LINK_BROKEN`).
16//!
17//! ## Implementation note — two paths for the incoming-edge scan
18//!
19//! The scale contract (SPEC § Tooling, plan: *"the interactive loop is
20//! O(changed), never O(store)"*) is the load-bearing rule here. [`backlinks`]
21//! is a loop op, so it must **not** open and `read_to_string` every content file
22//! in the store on each call. It resolves incoming edges by one of two paths,
23//! chosen by whether the call is scoped:
24//!
25//! - **Unscoped** (`dbmd graph backlinks <x>`, no `--type`/`--in`): one
26//!   embedded-ripgrep pass for the literal `[[<target>]]` over the tree, via
27//!   [`Store::find_links_to`] (`grep` + `ignore`, early-exit per file) — the
28//!   same scan engine [`crate::validate`]'s working-set incoming-linker step
29//!   uses. A single store traversal with cheap presence-only matching, not N
30//!   whole-file parses; that is what keeps the unscoped call inside the loop
31//!   budget. [`backlinks`] then filters the raw hits to content files and emits
32//!   canonical bare targets (its relationship view), where the lower-level
33//!   [`Store::find_links_to`] returns every `.md` the text appears in.
34//! - **Scoped** (`--type` / `--in`): the candidate set is enumerated from the
35//!   relevant type-folder `index.jsonl` sidecars — one sequential read per
36//!   type-folder (via [`crate::query::Query`], which sits on
37//!   [`Store::read_type_index`]) — and each candidate is confirmed by a
38//!   single-file parse. That is what makes `--type` / `--in` an *I/O* scope, not
39//!   just a result filter: a typed/layer-scoped `backlinks` reads only the
40//!   relevant folder(s)' sidecars and parses only those files.
41//!
42//! **Why the scoped path confirms by parsing the candidate, not by trusting the
43//! sidecar's `links` field.** A sidecar record's `links` is the file's
44//! *frontmatter* `links:` list only — it does **not** capture wiki-links written
45//! in the body or inside other typed frontmatter fields (`company: [[…]]`,
46//! `attendees: [ … ]`, `derived_from: [ … ]`). [`forwardlinks`] extracts edges
47//! from the whole file, so to keep the two directions on the **same** edge set
48//! (an incoming edge to X is exactly: some file whose [`forwardlinks`] contains
49//! X) the incoming-edge confirmation re-parses each candidate file the same way.
50//! The sidecar bounds *which* files are candidates; the parse decides whether
51//! each truly links. The unscoped ripgrep path stays on that same edge set by
52//! matching the link text wherever it lives in the file (frontmatter or body).
53//! A node's `summary` / `type` likewise read frontmatter directly (the source of
54//! truth the sidecar is derived from; never stale).
55
56use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
57use std::io;
58use std::path::{Path, PathBuf};
59
60use ignore::WalkBuilder;
61use regex::Regex;
62
63use crate::index::IndexRecord;
64use crate::query::Query;
65use crate::store::{Layer, Store, StoreError};
66
67/// Which edge directions a traversal follows.
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
69pub enum Direction {
70    /// Incoming edges only (backlinks).
71    Incoming,
72    /// Outgoing edges only (forwardlinks).
73    Outgoing,
74    /// Both directions.
75    Both,
76}
77
78/// One node reached during a [`neighborhood`] hydration: the file, its
79/// `summary`, and how it connects back toward the seed.
80#[derive(Debug, Clone, PartialEq, Eq)]
81pub struct ContextNode {
82    /// The store-relative path of the reached file.
83    pub path: PathBuf,
84    /// The file's `summary` (read from its sidecar entry / frontmatter).
85    pub summary: String,
86    /// The file's `type`, when known.
87    pub type_: Option<String>,
88    /// Hop distance from the seed (the seed itself is 0).
89    pub hops: u32,
90    /// The relationship edge that brought this node into the slice: the path it
91    /// links to/from one hop closer to the seed, and the direction.
92    pub via: Option<(PathBuf, Direction)>,
93}
94
95/// The readable working-set digest [`neighborhood`] returns: the seed plus the
96/// reached nodes with their summaries and connections. The relationship-axis
97/// "turn a seed into context" primitive.
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub struct ContextSlice {
100    /// The seed the slice was hydrated from.
101    pub seed: PathBuf,
102    /// The reached nodes (excluding the seed), in BFS order.
103    pub nodes: Vec<ContextNode>,
104}
105
106/// Incoming edges to `path`: files that wiki-link to it. The blast-radius /
107/// dependents primitive before an edit. Store-wide (every layer / every type);
108/// see [`backlinks_filtered`] for the `--type` / `--in`-scoped form.
109///
110/// `path` is the store-relative target as it would be written inside a
111/// wiki-link (with or without a trailing `.md`; both resolve to the same
112/// target). Returns each linking file as its **canonical bare wiki-link path**
113/// (store-relative, no `.md`) — the same key [`forwardlinks`] emits, so the two
114/// directions round-trip and [`neighborhood`] can use one node identity.
115/// Deduped, sorted, never including the seed itself.
116pub fn backlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
117    backlinks_filtered(store, path, &[], None)
118}
119
120/// Incoming edges to `path`, scoped by the linking file's `type` and/or layer —
121/// the `dbmd graph backlinks --type/--in` surface.
122///
123/// **Scale (the loop contract).** Two paths, by whether the call is scoped:
124///
125/// - **Unscoped** (`types` empty *and* `layer` `None`): one embedded-ripgrep
126///   pass for `[[<target>]]` across the store via [`Store::find_links_to`] — a
127///   single `grep` + `ignore` traversal with early-exit per file, never a
128///   `read_to_string` of every content file. This is the same scan engine
129///   [`crate::validate::validate_working_set`]'s incoming-linker step rides, and
130///   it keeps the unscoped call inside the loop budget (the old per-candidate
131///   confirm-read re-opened every file in the store → O(store)).
132/// - **Scoped** (`types` and/or `layer` set): the candidate set — the files that
133///   *might* link to `path` — is read from the relevant type-folder
134///   `index.jsonl` sidecars, so the call touches only the named folder(s):
135///   O(folder), the sanctioned loop cost. Each candidate is then confirmed by a
136///   single-file parse. When `types` lists several types, every named type's
137///   folder is read and the candidate sets unioned; a `layer` further restricts
138///   the candidate paths to that layer.
139///
140/// **Correctness (one edge set, both paths).** An incoming edge to X is exactly:
141/// some file whose [`forwardlinks`] contains X — a wiki-link in the body or in
142/// *any* frontmatter field (`company: [[…]]`, `attendees: [ … ]`), not just the
143/// sidecar's frontmatter `links:` projection. Both paths honor that:
144/// - The unscoped scan matches the literal `[[<target>]]` text wherever it lives
145///   in a file (frontmatter or body), the same edges [`forwardlinks`] extracts.
146///   [`Store::find_links_to`] returns *every* `.md` carrying the link text
147///   (including `index.md` catalogs); [`backlinks`] is the relationship view, so
148///   the results are filtered to content files ([`is_content_rel`]) and emitted
149///   as canonical bare targets, self-excluded.
150/// - The scoped path confirms each candidate via [`file_links_to`], which
151///   delegates to [`forwardlinks`] (body + every frontmatter field) — so a
152///   body-only or typed-field edge is caught, not just the sidecar's `links:`
153///   list.
154///
155/// Result form (canonical bare paths, deduped, sorted, seed excluded) is
156/// identical on both paths and matches [`backlinks`].
157pub fn backlinks_filtered(
158    store: &Store,
159    path: &Path,
160    types: &[String],
161    layer: Option<Layer>,
162) -> Result<Vec<PathBuf>, StoreError> {
163    let target = normalize_target(path);
164    if target.is_empty() {
165        return Ok(Vec::new());
166    }
167
168    // Unscoped: one embedded-ripgrep pass over the store (O(store) scan with
169    // early-exit per file), not a per-candidate read of every content file.
170    // `find_links_to` returns every `.md` carrying the link text (incl. catalog
171    // `index.md`); narrow to content files and canonicalize to the bare target
172    // form `backlinks` emits, dropping the seed's self-link.
173    if types.is_empty() && layer.is_none() {
174        let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
175        for rel in store.find_links_to(path)? {
176            if !is_content_rel(&rel) {
177                continue;
178            }
179            let linker = normalize_target(&rel);
180            if linker.is_empty() || linker == target {
181                // A file never counts as its own backlink.
182                continue;
183            }
184            hits.insert(PathBuf::from(linker));
185        }
186        return Ok(hits.into_iter().collect());
187    }
188
189    // Scoped: read only the named folder(s)' sidecars for the candidate set, then
190    // confirm each candidate with a single-file parse — O(folder), the I/O scope
191    // `--type` / `--in` buys.
192    let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
193    for candidate in candidate_records(store, types, layer)? {
194        let rel = &candidate.path;
195        let candidate_target = normalize_target(rel);
196        if candidate_target.is_empty() || candidate_target == target {
197            // A file never counts as its own backlink.
198            continue;
199        }
200        // Confirm the edge by parsing the candidate file the same way
201        // forwardlinks does (body + all frontmatter), so body/typed-field links
202        // are caught — the sidecar's `links` field alone would miss them.
203        if file_links_to(store, rel, &target)? {
204            hits.insert(PathBuf::from(candidate_target));
205        }
206    }
207
208    Ok(hits.into_iter().collect())
209}
210
211/// Outgoing edges from `path`: the wiki-link targets extracted from that single
212/// file. Loop-fast; follow the evidence chain.
213///
214/// `path` is the store-relative path of the file to read. Targets are returned
215/// as store-relative paths (bare, no `.md`), deduped and sorted; the file's
216/// links to itself are dropped. A missing file yields an empty list (a
217/// dangling seed has no outgoing edges to report — broken-link detection is
218/// [`crate::validate`]'s job).
219pub fn forwardlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
220    let self_target = normalize_target(path);
221    let abs = match resolve_existing(store, path) {
222        Some(a) => a,
223        None => return Ok(Vec::new()),
224    };
225    let body = match std::fs::read_to_string(&abs) {
226        Ok(b) => b,
227        // A file that isn't valid UTF-8 (e.g. a binary source) carries no
228        // wiki-links we can extract.
229        Err(e) if e.kind() == io::ErrorKind::InvalidData => return Ok(Vec::new()),
230        Err(e) => return Err(StoreError::Io(e)),
231    };
232
233    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
234    for target in extract_link_targets(&body) {
235        if target.is_empty() || target == self_target {
236            continue;
237        }
238        out.insert(PathBuf::from(target));
239    }
240    Ok(out.into_iter().collect())
241}
242
243/// The candidate set for an incoming-edge scan: the sidecar records that could
244/// link to the target, read from the type-folder `index.jsonl` sidecars (never
245/// a content-tree walk). `types`/`layer` narrow *which* sidecars are read — the
246/// I/O scope that keeps a typed/layer backlinks O(folder).
247///
248/// - `types` non-empty: read each type's folder sidecar (via [`Query`], which
249///   sits on [`Store::read_type_index`]), optionally layer-scoped, and union the
250///   records by path (a file appears once even if two type reads surface it).
251/// - `types` empty: every sidecar record under `layer` (or store-wide when
252///   `None`) via [`Store::sidecar_records`].
253fn candidate_records(
254    store: &Store,
255    types: &[String],
256    layer: Option<Layer>,
257) -> Result<Vec<IndexRecord>, StoreError> {
258    if types.is_empty() {
259        return store.sidecar_records(layer);
260    }
261    let mut by_path: std::collections::BTreeMap<PathBuf, IndexRecord> =
262        std::collections::BTreeMap::new();
263    for type_ in types {
264        let mut q = Query::new().with_type(type_);
265        if let Some(layer) = layer {
266            q = q.with_layer(layer);
267        }
268        for rec in q.execute(store)? {
269            by_path.insert(rec.path.clone(), rec);
270        }
271    }
272    Ok(by_path.into_values().collect())
273}
274
275/// True if the store file at `rel` carries a wiki-link whose canonical target
276/// equals `target`. Delegates to [`forwardlinks`] so the incoming-edge predicate
277/// is *exactly* the outgoing-edge extraction — body + every frontmatter field —
278/// keeping the two directions on one edge set. `forwardlinks` already emits
279/// canonical bare targets, so `target` (likewise normalized by the caller) is
280/// compared directly. A missing/binary file links to nothing.
281fn file_links_to(store: &Store, rel: &Path, target: &str) -> Result<bool, StoreError> {
282    let edges = forwardlinks(store, rel)?;
283    Ok(edges.iter().any(|e| e.as_os_str() == target))
284}
285
286/// **Context hydration.** Bounded BFS from `seed` over backlinks + forwardlinks
287/// out to `hops`, reading each reached file's `summary` + relationship, and
288/// returning a readable [`ContextSlice`]. Optionally filtered by `types` and
289/// `direction`. On-demand; no maintained graph. What the agent reaches for to
290/// assemble a working set in one call.
291///
292/// Traversal semantics:
293/// - **`hops`** bounds true graph distance from the seed. `hops == 0` returns
294///   an empty slice (the seed alone is no context).
295/// - **`direction`** selects which edges are followed: `Incoming` walks
296///   backlinks, `Outgoing` walks forwardlinks, `Both` walks the union.
297/// - **`types`**, when non-empty, filters which reached nodes appear in the
298///   slice — but traversal still passes *through* off-type nodes, so a
299///   `meeting` two hops out is still reachable through a `contact` even when
300///   filtering to `meeting`. (An empty `types` slice imposes no filter.)
301/// - Each node records the lowest hop count at which it is first reached (BFS
302///   order); the seed is never included as a node.
303pub fn neighborhood(
304    store: &Store,
305    seed: &Path,
306    hops: u32,
307    types: &[String],
308    direction: Direction,
309) -> Result<ContextSlice, StoreError> {
310    let seed_rel = PathBuf::from(normalize_target(seed));
311    let type_filter: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
312
313    // `discovered` guards against revisiting a node (and against re-adding the
314    // seed). BFS by levels so the first time we reach a node is its true min
315    // hop distance.
316    let mut discovered: HashSet<PathBuf> = HashSet::new();
317    discovered.insert(seed_rel.clone());
318
319    let mut nodes: Vec<ContextNode> = Vec::new();
320    let mut frontier: VecDeque<PathBuf> = VecDeque::new();
321    frontier.push_back(seed_rel.clone());
322
323    let mut hop = 0u32;
324    while hop < hops && !frontier.is_empty() {
325        hop += 1;
326        let level_size = frontier.len();
327        for _ in 0..level_size {
328            let current = frontier.pop_front().expect("frontier non-empty");
329
330            // Collect this node's edges in the requested direction(s). Each
331            // edge carries the neighbor path + the direction we traversed it.
332            let mut edges: Vec<(PathBuf, Direction)> = Vec::new();
333            if matches!(direction, Direction::Outgoing | Direction::Both) {
334                for nbr in forwardlinks(store, &current)? {
335                    edges.push((nbr, Direction::Outgoing));
336                }
337            }
338            if matches!(direction, Direction::Incoming | Direction::Both) {
339                for nbr in backlinks(store, &current)? {
340                    edges.push((nbr, Direction::Incoming));
341                }
342            }
343
344            for (neighbor, dir) in edges {
345                if !discovered.insert(neighbor.clone()) {
346                    continue;
347                }
348                let (summary, type_) = read_summary_and_type(store, &neighbor);
349                let include = type_filter.is_empty()
350                    || type_
351                        .as_deref()
352                        .map(|t| type_filter.contains(t))
353                        .unwrap_or(false);
354                if include {
355                    nodes.push(ContextNode {
356                        path: neighbor.clone(),
357                        summary,
358                        type_,
359                        hops: hop,
360                        via: Some((current.clone(), dir)),
361                    });
362                }
363                // Off-type nodes are not emitted but still seed the next BFS
364                // level, so the type filter narrows the *result*, not the
365                // reachable graph.
366                frontier.push_back(neighbor);
367            }
368        }
369    }
370
371    Ok(ContextSlice {
372        seed: seed_rel,
373        nodes,
374    })
375}
376
377/// **SWEEP.** Content files with no incoming AND no outgoing wiki-links — the
378/// curation worklist ("ingested but not yet wired into the wiki"). Off the
379/// loop. Optionally scoped to a layer.
380///
381/// A file is an orphan iff it neither links out to another store file nor is
382/// linked to by one. Incoming edges are counted across the *whole* store
383/// (a link from any layer un-orphans a file), even when `layer` scopes the
384/// candidate set. Returns store-relative paths, sorted.
385pub fn orphans(store: &Store, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
386    // One walk of the whole store: for every content file, record (a) whether
387    // it has any outgoing link, and (b) accumulate the set of every target any
388    // file links to (its incoming-edge set). Both come from a single read per
389    // file — the SWEEP cost.
390    let all = walk_content_files(store)?;
391
392    let mut linked_to: HashSet<PathBuf> = HashSet::new();
393    let mut has_outgoing: HashMap<PathBuf, bool> = HashMap::new();
394
395    for abs in &all {
396        let rel = match rel_path(store, abs) {
397            Some(r) => r,
398            None => continue,
399        };
400        let self_target = normalize_target(&rel);
401
402        let body = match std::fs::read_to_string(abs) {
403            Ok(b) => b,
404            Err(e) if e.kind() == io::ErrorKind::InvalidData => String::new(),
405            Err(e) => return Err(StoreError::Io(e)),
406        };
407
408        let mut outgoing = false;
409        for target in extract_link_targets(&body) {
410            if target.is_empty() || target == self_target {
411                continue;
412            }
413            outgoing = true;
414            linked_to.insert(PathBuf::from(target));
415        }
416        has_outgoing.insert(rel, outgoing);
417    }
418
419    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
420    for abs in &all {
421        let rel = match rel_path(store, abs) {
422            Some(r) => r,
423            None => continue,
424        };
425        if let Some(layer) = layer {
426            if path_layer(&rel) != Some(layer) {
427                continue;
428            }
429        }
430        let outgoing = has_outgoing.get(&rel).copied().unwrap_or(false);
431        let incoming = linked_to.contains(&PathBuf::from(normalize_target(&rel)));
432        if !outgoing && !incoming {
433            out.insert(rel);
434        }
435    }
436
437    Ok(out.into_iter().collect())
438}
439
440/// **Write-side.** Rewrite every incoming `[[old]]` wiki-link in `text` to
441/// `[[new]]`, preserving any `|display` override and emitting the canonical bare
442/// target (no `.md`). The write-side twin of [`backlinks`]: where `backlinks`
443/// *finds* the files carrying an edge to `old`, this *retargets* that edge to
444/// `new` inside one file's contents.
445///
446/// `old` and `new` are store-relative paths in the wiki-link sense — both are
447/// passed through the same [`normalize_target`] the read side keys on, so the
448/// `.md` and bare spellings of `old` collapse to one target and a match here is
449/// exactly a match [`backlinks`] / [`Store::find_links_to`](crate::Store::find_links_to)
450/// would report. A link is rewritten iff its normalized target equals
451/// `normalize_target(old)`; prefix collisions (`old=a/b` vs `[[a/bc]]`) and
452/// short-form links never match. Returns the rewritten text (identical to the
453/// input when nothing matched), so the caller can cheaply detect a no-op.
454///
455/// Operates on the raw bytes (not a parser round-trip) so a link in frontmatter
456/// or body is retargeted uniformly and nothing else is reflowed.
457pub fn rewrite_links_to(text: &str, old: &Path, new: &Path) -> String {
458    let old_target = normalize_target(old);
459    let new_target = normalize_target(new);
460    if old_target.is_empty() {
461        // No target to match → never rewrite anything.
462        return text.to_string();
463    }
464
465    let re = rewrite_link_re();
466    let mut out = String::with_capacity(text.len());
467    let mut last = 0usize;
468    for caps in re.captures_iter(text) {
469        let whole = caps.get(0).expect("group 0 always present");
470        let raw_target = caps.get(1).map(|m| m.as_str()).unwrap_or("");
471        // Match on the SAME normalized key the read side uses, so `[[old]]`,
472        // `[[old.md]]`, and `[[./old]]` all retarget and a prefix like
473        // `[[old-jr]]` never does.
474        if normalize_target(Path::new(raw_target)) != old_target {
475            continue;
476        }
477        // Copy the gap since the previous rewrite verbatim, then the rebuilt
478        // link: canonical bare new target + the original display, if any.
479        out.push_str(&text[last..whole.start()]);
480        out.push_str("[[");
481        out.push_str(&new_target);
482        if let Some(display) = caps.get(2) {
483            out.push('|');
484            out.push_str(display.as_str());
485        }
486        out.push_str("]]");
487        last = whole.end();
488    }
489    out.push_str(&text[last..]);
490    out
491}
492
493// ── Private helpers ─────────────────────────────────────────────────────────
494
495/// The wiki-link regex used by the **write side** ([`rewrite_links_to`]):
496/// captures the target (group 1) AND the optional `|display` (group 2) so a
497/// rewrite can re-emit the display verbatim. The target/display character
498/// classes match [`wiki_link_re`] exactly, so the write side recognizes a link
499/// iff the read side does — the two never disagree on what a `[[…]]` is.
500fn rewrite_link_re() -> Regex {
501    Regex::new(r"\[\[([^\]\|\n]+?)(?:\|([^\]\n]*))?\]\]")
502        .expect("static wiki-link rewrite regex compiles")
503}
504
505/// Normalize a store-relative path into the canonical wiki-link target form:
506/// forward slashes, no leading `./` or `/`, and no trailing `.md`. This is the
507/// single key that incoming/outgoing edges are compared on, so the `.md` and
508/// bare forms of a target unify.
509fn normalize_target(path: &Path) -> String {
510    let mut s = path.to_string_lossy().replace('\\', "/");
511    while let Some(rest) = s.strip_prefix("./") {
512        s = rest.to_string();
513    }
514    let s = s.trim_start_matches('/');
515    let s = s.strip_suffix(".md").unwrap_or(s);
516    s.trim().to_string()
517}
518
519/// The wiki-link regex: `[[target]]` / `[[target|display]]`. Captures the raw
520/// target (group 1). Compiled once per call site; cheap.
521fn wiki_link_re() -> Regex {
522    // target = anything up to the first `|` or `]`. display (optional) is
523    // discarded. Matches across a single line/body slice.
524    Regex::new(r"\[\[([^\]\|\n]+?)(?:\|[^\]\n]*)?\]\]").expect("static wiki-link regex compiles")
525}
526
527/// Extract every wiki-link target from a body, normalized to the canonical
528/// store-relative form. Order-preserving; duplicates kept (callers dedup).
529fn extract_link_targets(body: &str) -> Vec<String> {
530    let re = wiki_link_re();
531    re.captures_iter(body)
532        .filter_map(|c| c.get(1))
533        .map(|m| normalize_target(Path::new(m.as_str().trim())))
534        .filter(|t| !t.is_empty())
535        .collect()
536}
537
538/// Resolve the store root + a store-relative path to the absolute on-disk file,
539/// trying the path as written and then with a `.md` extension. `None` if
540/// neither exists.
541fn resolve_existing(store: &Store, store_relative: &Path) -> Option<PathBuf> {
542    let direct = store.root.join(store_relative);
543    if direct.is_file() {
544        return Some(direct);
545    }
546    let normalized = normalize_target(store_relative);
547    let with_md = store.root.join(format!("{normalized}.md"));
548    if with_md.is_file() {
549        return Some(with_md);
550    }
551    None
552}
553
554/// Convert an absolute path under the store root into its store-relative form.
555fn rel_path(store: &Store, abs: &Path) -> Option<PathBuf> {
556    abs.strip_prefix(&store.root).ok().map(|p| p.to_path_buf())
557}
558
559/// Which layer a store-relative path sits in, by its first component.
560fn path_layer(rel: &Path) -> Option<Layer> {
561    let first = rel.components().next()?;
562    match first.as_os_str().to_str()? {
563        "sources" => Some(Layer::Sources),
564        "records" => Some(Layer::Records),
565        "wiki" => Some(Layer::Wiki),
566        _ => None,
567    }
568}
569
570/// True if a store-relative path is a *content* file: under `sources/`,
571/// `records/`, or `wiki/`, a `.md` file, and not an `index.md`. Meta files
572/// (`DB.md`, `log.md`, `log/…`, sidecars) are excluded.
573fn is_content_rel(rel: &Path) -> bool {
574    if path_layer(rel).is_none() {
575        return false;
576    }
577    match rel.extension().and_then(|e| e.to_str()) {
578        Some("md") => {}
579        _ => return false,
580    }
581    rel.file_name().and_then(|n| n.to_str()) != Some("index.md")
582}
583
584/// Walk every content `.md` file in the store via the **`ignore`** walker
585/// (the ripgrep directory engine). Only the three layer roots
586/// (`sources/`/`records/`/`wiki/`) are descended, so `DB.md`, `log.md`, and
587/// `log/` at the store root are structurally never reached; hidden dirs and
588/// per-folder `index.md` sidecars are filtered out ([`is_content_rel`]). Honors
589/// `.gitignore` the way `rg` does. Returns absolute paths. SWEEP-class.
590fn walk_content_files(store: &Store) -> Result<Vec<PathBuf>, StoreError> {
591    let mut out = Vec::new();
592    for layer in Layer::all() {
593        let dir = store.root.join(layer_dir_name(layer));
594        if !dir.is_dir() {
595            continue;
596        }
597        let walker = WalkBuilder::new(&dir)
598            .hidden(true)
599            .git_ignore(true)
600            .git_global(false)
601            .require_git(false)
602            .build();
603        for result in walker {
604            let entry = result.map_err(|e| StoreError::Search {
605                root: store.root.clone(),
606                message: format!("walk failed: {e}"),
607            })?;
608            if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
609                continue;
610            }
611            let abs = entry.into_path();
612            if let Some(rel) = rel_path(store, &abs) {
613                if is_content_rel(&rel) {
614                    out.push(abs);
615                }
616            }
617        }
618    }
619    Ok(out)
620}
621
622/// The on-disk folder name for a layer. Mirrors `Layer::dir_name`; kept local
623/// so the graph module owns its own copy rather than coupling to that body.
624fn layer_dir_name(layer: Layer) -> &'static str {
625    match layer {
626        Layer::Sources => "sources",
627        Layer::Records => "records",
628        Layer::Wiki => "wiki",
629    }
630}
631
632/// Read a reached node's `summary` and `type` from its frontmatter. A missing
633/// file, missing frontmatter, or unparseable YAML degrades to an empty summary
634/// / unknown type rather than failing the whole hydration — `neighborhood` is
635/// best-effort context assembly, not validation.
636fn read_summary_and_type(store: &Store, rel: &Path) -> (String, Option<String>) {
637    let abs = match resolve_existing(store, rel) {
638        Some(a) => a,
639        None => return (String::new(), None),
640    };
641    let text = match std::fs::read_to_string(&abs) {
642        Ok(t) => t,
643        Err(_) => return (String::new(), None),
644    };
645    let yaml = match frontmatter_block(&text) {
646        Some(y) => y,
647        None => return (String::new(), None),
648    };
649    let value: serde_yml::Value = match serde_yml::from_str(yaml) {
650        Ok(v) => v,
651        Err(_) => return (String::new(), None),
652    };
653    let summary = value
654        .get("summary")
655        .and_then(|v| v.as_str())
656        .unwrap_or("")
657        .to_string();
658    let type_ = value
659        .get("type")
660        .and_then(|v| v.as_str())
661        .map(|s| s.to_string());
662    (summary, type_)
663}
664
665/// Return the YAML between the opening and closing `---` fences (exclusive), or
666/// `None` if the text has no leading frontmatter block. Local mirror of the
667/// parser's split so the graph module stays self-contained.
668fn frontmatter_block(text: &str) -> Option<&str> {
669    let rest = text
670        .strip_prefix("---\n")
671        .or_else(|| text.strip_prefix("---\r\n"))?;
672    // Find the closing fence: a line that is exactly `---`.
673    let mut idx = 0usize;
674    for line in rest.split_inclusive('\n') {
675        let trimmed = line.trim_end_matches(['\r', '\n']);
676        if trimmed == "---" {
677            return Some(&rest[..idx]);
678        }
679        idx += line.len();
680    }
681    None
682}
683
684#[cfg(test)]
685mod tests {
686    use super::*;
687    use std::fs;
688    use tempfile::TempDir;
689
690    use crate::parser::Config;
691
692    // ── Fixture builder ─────────────────────────────────────────────────────
693    //
694    // A real on-disk store in a tempdir. We write actual files (frontmatter +
695    // wiki-links) and exercise the real code paths. The fixture constructs the
696    // `Store` by its public fields rather than `Store::open`, so the graph
697    // tests stand on their own and do not depend on any other module's
698    // behavior. Each test asserts the behavior the SPEC promises, derived from
699    // intent, never from echoing the function's own output.
700    //
701    // `backlinks` (and `neighborhood` in any incoming direction) enumerate their
702    // candidate set from the type-folder `index.jsonl` sidecars — the loop
703    // contract: never a whole-store content walk. A real db.md store maintains
704    // those sidecars write-through, so a test that exercises backlinks must call
705    // [`Fixture::reindex`] after writing its files to build them (the SWEEP that
706    // `dbmd index rebuild` runs). Forwardlinks/orphans read content directly and
707    // need no sidecar.
708
709    struct Fixture {
710        _tmp: TempDir,
711        store: Store,
712    }
713
714    impl Fixture {
715        fn new() -> Self {
716            let tmp = TempDir::new().expect("tempdir");
717            let root = tmp.path().to_path_buf();
718            fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n# store\n").expect("DB.md");
719            let store = Store {
720                root,
721                config: Config::default(),
722            };
723            Fixture { _tmp: tmp, store }
724        }
725
726        /// Write a content file at a store-relative path with the given type,
727        /// summary, and body. Creates parent dirs.
728        fn write(&self, rel: &str, type_: &str, summary: &str, body: &str) {
729            let abs = self.store.root.join(rel);
730            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
731            let contents = format!(
732                "---\ntype: {type_}\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: {summary}\n---\n{body}\n"
733            );
734            fs::write(&abs, contents).expect("write file");
735        }
736
737        /// Write a raw file verbatim (for frontmatter-shape edge cases).
738        fn write_raw(&self, rel: &str, contents: &str) {
739            let abs = self.store.root.join(rel);
740            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
741            fs::write(&abs, contents).expect("write raw");
742        }
743
744        /// Build the type-folder `index.jsonl` sidecars from the content written
745        /// so far — the state a real store is always in (write-through), and the
746        /// candidate set `backlinks` reads. Call after writing files in any test
747        /// that exercises `backlinks` or an incoming-direction `neighborhood`.
748        fn reindex(&self) {
749            crate::index::Index::rebuild_all(&self.store).expect("rebuild sidecars");
750        }
751
752        fn p(&self, rel: &str) -> PathBuf {
753            PathBuf::from(rel)
754        }
755    }
756
757    fn paths(v: &[PathBuf]) -> Vec<String> {
758        v.iter()
759            .map(|p| p.to_string_lossy().replace('\\', "/"))
760            .collect()
761    }
762
763    // ── normalize_target ────────────────────────────────────────────────────
764
765    #[test]
766    fn normalize_strips_md_and_leading_dotslash() {
767        assert_eq!(
768            normalize_target(Path::new("records/contacts/sarah.md")),
769            "records/contacts/sarah"
770        );
771        assert_eq!(
772            normalize_target(Path::new("./wiki/people/elena")),
773            "wiki/people/elena"
774        );
775        assert_eq!(normalize_target(Path::new("/records/x")), "records/x");
776        // Bare and `.md` forms must collapse to the same key, or edges won't unify.
777        assert_eq!(
778            normalize_target(Path::new("a/b")),
779            normalize_target(Path::new("a/b.md"))
780        );
781    }
782
783    // ── extract_link_targets (forwardlinks core) ────────────────────────────
784
785    #[test]
786    fn extract_handles_display_text_and_md_suffix() {
787        let body = "See [[wiki/people/sarah-chen|Sarah]] and [[records/contacts/elena.md]].";
788        let got = extract_link_targets(body);
789        assert_eq!(
790            got,
791            vec!["wiki/people/sarah-chen", "records/contacts/elena"]
792        );
793    }
794
795    #[test]
796    fn extract_ignores_external_markdown_links() {
797        // Standard markdown links are NOT wiki-links and must not be extracted
798        // (SPEC: external refs don't participate in the graph).
799        let body = "[Acme](https://acme.io) but [[records/companies/acme]] is internal.";
800        let got = extract_link_targets(body);
801        assert_eq!(got, vec!["records/companies/acme"]);
802    }
803
804    #[test]
805    fn extract_display_text_is_not_treated_as_a_target() {
806        // A `|display` segment that looks path-like must not become a target;
807        // only the part before `|` is the link target.
808        let body = "[[records/contacts/sarah|sources/emails/decoy]]";
809        let got = extract_link_targets(body);
810        assert_eq!(got, vec!["records/contacts/sarah"]);
811    }
812
813    // ── rewrite_links_to (write-side twin of backlinks) ─────────────────────
814
815    #[test]
816    fn rewrite_plain_link_to_canonical_new_target() {
817        let got = rewrite_links_to(
818            "See [[records/contacts/sarah-chen]] today.",
819            Path::new("records/contacts/sarah-chen"),
820            Path::new("records/contacts/sarah-chen-acme"),
821        );
822        assert_eq!(got, "See [[records/contacts/sarah-chen-acme]] today.");
823    }
824
825    #[test]
826    fn rewrite_preserves_display_override() {
827        let got = rewrite_links_to(
828            "With [[records/contacts/sarah-chen|Sarah]].",
829            Path::new("records/contacts/sarah-chen"),
830            Path::new("records/contacts/sarah-chen-acme"),
831        );
832        assert_eq!(got, "With [[records/contacts/sarah-chen-acme|Sarah]].");
833    }
834
835    #[test]
836    fn rewrite_matches_md_suffixed_old_and_emits_bare_new() {
837        // The `.md` spelling of the old target must match (it normalizes to the
838        // same key the read side uses), and the new target is emitted bare —
839        // the writer doctrine validate enforces (`WIKI_LINK_HAS_EXTENSION`).
840        let got = rewrite_links_to(
841            "[[records/contacts/sarah-chen.md]]",
842            Path::new("records/contacts/sarah-chen"),
843            Path::new("records/contacts/new.md"),
844        );
845        assert_eq!(got, "[[records/contacts/new]]");
846    }
847
848    #[test]
849    fn rewrite_leaves_prefix_collisions_and_short_form_untouched() {
850        // Boundary correctness, anchored to the SAME normalize_target the read
851        // side keys on: `records/contacts/sarah-chen` must NOT match the longer
852        // `[[…-jr]]`, the short-form `[[sarah-chen]]`, or an unrelated target.
853        let input = "[[records/contacts/sarah-chen-jr]] [[sarah-chen]] [[wiki/topics/x]]";
854        let got = rewrite_links_to(
855            input,
856            Path::new("records/contacts/sarah-chen"),
857            Path::new("records/contacts/new"),
858        );
859        assert_eq!(got, input, "no genuine edge to the seed → text unchanged");
860    }
861
862    #[test]
863    fn rewrite_handles_multiple_occurrences_and_mixed_spellings() {
864        let got = rewrite_links_to(
865            "[[records/x]] then [[./records/x]] and [[records/x.md|d]] end",
866            Path::new("records/x"),
867            Path::new("records/y"),
868        );
869        // All three spellings of the same target retarget; the display survives.
870        assert_eq!(
871            got,
872            "[[records/y]] then [[records/y]] and [[records/y|d]] end"
873        );
874    }
875
876    #[test]
877    fn rewrite_retargets_exactly_the_edges_the_core_parser_sees() {
878        // The load-bearing property of moving the rewrite into core: the write
879        // side must operate on EXACTLY the edge set the read side recognizes —
880        // the same `extract_link_targets` / `normalize_target` grammar that
881        // `forwardlinks` is built on. Anchor the test to that grammar (via
882        // `forwardlinks` on a real file) rather than re-listing literals, so a
883        // future divergence between the read parser and the write rewrite fails
884        // here. (Coupled to `forwardlinks` — the single-file edge extractor —
885        // not the multi-file `backlinks` traversal, so it tests the grammar, not
886        // the walk.)
887        let fx = Fixture::new();
888        let body = "Met [[records/contacts/sarah.md|Sarah]] and not [[records/contacts/sarah-2]].";
889        fx.write("wiki/people/bio.md", "wiki-page", "bio", body);
890
891        // Read side: the parser sees two outgoing edges, both in canonical bare
892        // form (the `.md` spelling collapsed). `sarah` is a real edge here.
893        let edges = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
894        assert_eq!(
895            paths(&edges),
896            vec!["records/contacts/sarah", "records/contacts/sarah-2"],
897            "fixture must contain exactly the two edges this test reasons about"
898        );
899
900        // Write side: rewriting `sarah → sarah-chen` must retarget the edge the
901        // parser recognized (matching the `.md` spelling), preserve the display,
902        // and leave the unrelated `sarah-2` edge untouched.
903        let got = rewrite_links_to(
904            body,
905            Path::new("records/contacts/sarah"),
906            Path::new("records/contacts/sarah-chen"),
907        );
908        assert_eq!(
909            got,
910            "Met [[records/contacts/sarah-chen|Sarah]] and not [[records/contacts/sarah-2]]."
911        );
912
913        // Cross-check through the parser: the rewritten text's edge set is the
914        // original with `sarah` swapped for `sarah-chen` — proving the rewrite
915        // moved exactly one edge, the one the read side keyed on.
916        fx.write("wiki/people/bio.md", "wiki-page", "bio", &got);
917        let after = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
918        assert_eq!(
919            paths(&after),
920            vec!["records/contacts/sarah-2", "records/contacts/sarah-chen"],
921            "after rewrite the parser must see the new target and not the old"
922        );
923    }
924
925    #[test]
926    fn rewrite_empty_old_target_is_a_no_op() {
927        // A degenerate `old` (normalizes to empty) must never rewrite anything,
928        // mirroring backlinks' empty-target guard.
929        let input = "[[records/x]] [[]] text";
930        let got = rewrite_links_to(input, Path::new(""), Path::new("records/y"));
931        assert_eq!(got, input);
932    }
933
934    #[test]
935    fn rewrite_no_match_returns_input_unchanged() {
936        let input = "no links, [external](https://x), and [[wiki/topics/y]]";
937        let got = rewrite_links_to(input, Path::new("records/x"), Path::new("records/z"));
938        assert_eq!(got, input);
939    }
940
941    // ── forwardlinks ─────────────────────────────────────────────────────────
942
943    #[test]
944    fn forwardlinks_returns_sorted_deduped_targets_excluding_self() {
945        let fx = Fixture::new();
946        fx.write(
947            "wiki/projects/renewal.md",
948            "wiki-page",
949            "Renewal project",
950            "Links: [[records/contacts/sarah]] [[records/companies/acme]] [[records/contacts/sarah]] and itself [[wiki/projects/renewal]].",
951        );
952        // The targets need not exist on disk for forwardlinks (it reads the one
953        // file only). Self-links are dropped; duplicates collapse; sorted asc.
954        let got = forwardlinks(&fx.store, &fx.p("wiki/projects/renewal.md")).unwrap();
955        assert_eq!(
956            paths(&got),
957            vec!["records/companies/acme", "records/contacts/sarah"]
958        );
959    }
960
961    #[test]
962    fn forwardlinks_picks_up_wiki_links_in_frontmatter() {
963        // SPEC: wiki-links appear in scalar + block-sequence frontmatter fields,
964        // not just the body. forwardlinks must follow those edges too.
965        let fx = Fixture::new();
966        fx.write_raw(
967            "records/meetings/m1.md",
968            "---\ntype: meeting\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Renewal sync\ncompany: [[records/companies/acme]]\nattendees:\n  - [[records/contacts/sarah]]\n  - [[records/contacts/elena]]\n---\nNotes about [[wiki/projects/renewal]].\n",
969        );
970        let got = forwardlinks(&fx.store, &fx.p("records/meetings/m1.md")).unwrap();
971        assert_eq!(
972            paths(&got),
973            vec![
974                "records/companies/acme",
975                "records/contacts/elena",
976                "records/contacts/sarah",
977                "wiki/projects/renewal",
978            ]
979        );
980    }
981
982    #[test]
983    fn forwardlinks_missing_file_is_empty_not_error() {
984        let fx = Fixture::new();
985        let got = forwardlinks(&fx.store, &fx.p("wiki/people/ghost.md")).unwrap();
986        assert!(got.is_empty());
987    }
988
989    #[test]
990    fn forwardlinks_resolves_seed_given_without_md_extension() {
991        let fx = Fixture::new();
992        fx.write(
993            "wiki/people/sarah.md",
994            "wiki-page",
995            "Sarah bio",
996            "Works at [[records/companies/acme]].",
997        );
998        // Seed passed in bare wiki-link form (no `.md`) must still resolve.
999        let got = forwardlinks(&fx.store, &fx.p("wiki/people/sarah")).unwrap();
1000        assert_eq!(paths(&got), vec!["records/companies/acme"]);
1001    }
1002
1003    // ── backlinks ──────────────────────────────────────────────────────────
1004
1005    #[test]
1006    fn backlinks_finds_incoming_across_layers_and_link_forms() {
1007        let fx = Fixture::new();
1008        // Target.
1009        fx.write("records/contacts/sarah.md", "contact", "Sarah Chen", "");
1010        // Three different incoming-link spellings, all to the same target.
1011        fx.write(
1012            "wiki/people/sarah.md",
1013            "wiki-page",
1014            "bio",
1015            "See [[records/contacts/sarah]].",
1016        );
1017        fx.write(
1018            "records/meetings/m1.md",
1019            "meeting",
1020            "Renewal call",
1021            "Attendee [[records/contacts/sarah|Sarah]].",
1022        );
1023        fx.write(
1024            "sources/emails/e1.md",
1025            "email",
1026            "Hi",
1027            "From [[records/contacts/sarah.md]] today.",
1028        );
1029        // A file that links to a DIFFERENT contact must not be a backlink.
1030        fx.write(
1031            "wiki/people/other.md",
1032            "wiki-page",
1033            "x",
1034            "[[records/contacts/sarah-2]]",
1035        );
1036        fx.reindex();
1037
1038        // All three link forms ([[x]], [[x|d]], [[x.md]]) resolve to the same
1039        // target and are found; the linkers are returned in canonical bare form.
1040        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1041        assert_eq!(
1042            paths(&got),
1043            vec![
1044                "records/meetings/m1",
1045                "sources/emails/e1",
1046                "wiki/people/sarah",
1047            ]
1048        );
1049    }
1050
1051    #[test]
1052    fn backlinks_and_forwardlinks_round_trip_on_same_key() {
1053        // If A forwardlinks to B, then B backlinks to A — both expressed in the
1054        // identical bare key, so neighborhood can dedup across directions.
1055        let fx = Fixture::new();
1056        fx.write(
1057            "wiki/people/a.md",
1058            "wiki-page",
1059            "A",
1060            "Knows [[wiki/people/b]].",
1061        );
1062        fx.write("wiki/people/b.md", "wiki-page", "B", "");
1063        fx.reindex();
1064        let fwd = forwardlinks(&fx.store, &fx.p("wiki/people/a.md")).unwrap();
1065        let back = backlinks(&fx.store, &fx.p("wiki/people/b.md")).unwrap();
1066        assert_eq!(paths(&fwd), vec!["wiki/people/b"]);
1067        assert_eq!(paths(&back), vec!["wiki/people/a"]);
1068    }
1069
1070    #[test]
1071    fn backlinks_does_not_match_path_prefix_collisions() {
1072        let fx = Fixture::new();
1073        fx.write("records/contacts/sam.md", "contact", "Sam", "");
1074        // `sam-smith` shares the `sam` prefix; must NOT count as a backlink to `sam`.
1075        fx.write(
1076            "wiki/people/x.md",
1077            "wiki-page",
1078            "x",
1079            "[[records/contacts/sam-smith]]",
1080        );
1081        // The genuine backlink.
1082        fx.write(
1083            "wiki/people/y.md",
1084            "wiki-page",
1085            "y",
1086            "[[records/contacts/sam]]",
1087        );
1088        fx.reindex();
1089
1090        let got = backlinks(&fx.store, &fx.p("records/contacts/sam")).unwrap();
1091        assert_eq!(paths(&got), vec!["wiki/people/y"]);
1092    }
1093
1094    #[test]
1095    fn backlinks_excludes_self_reference() {
1096        let fx = Fixture::new();
1097        // A page that links to itself is not its own backlink.
1098        fx.write(
1099            "wiki/synthesis/overview.md",
1100            "wiki-page",
1101            "Overview",
1102            "This page [[wiki/synthesis/overview]] references itself.",
1103        );
1104        fx.reindex();
1105        let got = backlinks(&fx.store, &fx.p("wiki/synthesis/overview.md")).unwrap();
1106        assert!(
1107            got.is_empty(),
1108            "self-link must not appear as a backlink, got {got:?}"
1109        );
1110    }
1111
1112    #[test]
1113    fn backlinks_empty_when_nobody_links() {
1114        let fx = Fixture::new();
1115        fx.write("records/contacts/lonely.md", "contact", "Lonely", "");
1116        fx.write(
1117            "wiki/people/unrelated.md",
1118            "wiki-page",
1119            "x",
1120            "[[records/companies/acme]]",
1121        );
1122        fx.reindex();
1123        let got = backlinks(&fx.store, &fx.p("records/contacts/lonely.md")).unwrap();
1124        assert!(got.is_empty());
1125    }
1126
1127    #[test]
1128    fn backlinks_ignores_index_and_meta_files() {
1129        let fx = Fixture::new();
1130        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1131        // An index.md that lists the target must NOT be reported as a backlink
1132        // (indexes are catalog, not relationship edges).
1133        fx.write_raw(
1134            "records/contacts/index.md",
1135            "---\ntype: index\nscope: folder\nfolder: records/contacts\n---\n- [[records/contacts/sarah]] — Sarah\n",
1136        );
1137        fx.reindex();
1138        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1139        assert!(got.is_empty(), "index.md must be excluded, got {got:?}");
1140    }
1141
1142    #[test]
1143    fn backlinks_finds_body_only_edge_not_in_frontmatter_links_field() {
1144        // REGRESSION: the sidecar's `links` field carries only the file's
1145        // frontmatter `links:` list; it does NOT include wiki-links written in
1146        // the body or in other typed frontmatter fields. Answering backlinks
1147        // from `links[]` alone would silently miss this edge. The candidate set
1148        // is sidecar-bounded, but each candidate's edge is confirmed by parsing
1149        // the file (the same extraction forwardlinks uses), so a body-only link
1150        // must still register as a backlink.
1151        let fx = Fixture::new();
1152        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1153        // `meeting.md` links to sarah ONLY in its body — its frontmatter has no
1154        // `links:` field at all, so the sidecar record's `links` is empty.
1155        fx.write(
1156            "records/meetings/standup.md",
1157            "meeting",
1158            "Standup",
1159            "Discussed renewal with [[records/contacts/sarah]].",
1160        );
1161        fx.reindex();
1162
1163        // Guard the premise: the sidecar record really does carry an empty
1164        // `links` (so this test fails loudly if the index ever starts extracting
1165        // body links — at which point the backlink predicate could be revisited).
1166        let rec = fx
1167            .store
1168            .find_by_type("meeting")
1169            .unwrap()
1170            .into_iter()
1171            .find(|r| r.path == fx.p("records/meetings/standup.md"))
1172            .expect("meeting is catalogued in its sidecar");
1173        assert!(
1174            rec.links.is_empty(),
1175            "premise: the body link is NOT projected into the sidecar `links` field; got {:?}",
1176            rec.links
1177        );
1178
1179        // Yet backlinks still finds it — because it confirms via the file parse,
1180        // not via the sidecar `links` field.
1181        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1182        assert_eq!(
1183            paths(&got),
1184            vec!["records/meetings/standup"],
1185            "a body-only wiki-link must register as a backlink"
1186        );
1187    }
1188
1189    #[test]
1190    fn backlinks_finds_edge_in_typed_frontmatter_field() {
1191        // A wiki-link inside a *typed* frontmatter field (`company:`) is a real
1192        // edge forwardlinks follows, so backlinks must find it too — even though
1193        // the sidecar's `links` field (the `links:` key only) does not list it.
1194        let fx = Fixture::new();
1195        fx.write("records/companies/acme.md", "company", "Acme", "");
1196        fx.write_raw(
1197            "records/contacts/sarah.md",
1198            "---\ntype: contact\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Sarah\ncompany: [[records/companies/acme]]\n---\nBody with no links.\n",
1199        );
1200        fx.reindex();
1201        let got = backlinks(&fx.store, &fx.p("records/companies/acme.md")).unwrap();
1202        assert_eq!(
1203            paths(&got),
1204            vec!["records/contacts/sarah"],
1205            "a wiki-link in a typed frontmatter field is an incoming edge"
1206        );
1207    }
1208
1209    #[test]
1210    fn backlinks_unscoped_scans_the_tree_not_only_the_sidecar() {
1211        // REGRESSION (loop budget): an UNSCOPED `backlinks` must resolve incoming
1212        // edges with a SINGLE embedded-ripgrep pass over the tree
1213        // (`Store::find_links_to`), NOT by reading the sidecar candidate set and
1214        // then `read_to_string`-confirming each candidate (which re-opens every
1215        // content file → O(store); the documented >3x budget miss). A ripgrep
1216        // pass is the same scan engine `validate`/`rename`/`dbmd links` ride, and
1217        // the tree — not the sidecar — is its ground truth: a linker that is on
1218        // disk but absent from every sidecar (stale / never-built index) is still
1219        // found. We assert that behaviorally, which fails loudly if the unscoped
1220        // path ever reverts to the sidecar-bounded per-candidate confirm loop
1221        // (that loop would NOT find the unindexed linker).
1222        let fx = Fixture::new();
1223        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1224        fx.write(
1225            "wiki/people/indexed.md",
1226            "wiki-page",
1227            "Indexed",
1228            "[[records/contacts/sarah]]",
1229        );
1230        fx.reindex(); // builds sidecars for sarah + the indexed linker
1231
1232        // Now drop a NEW linker on disk WITHOUT reindexing — it is on disk but in
1233        // no sidecar.
1234        fx.write(
1235            "wiki/people/unindexed.md",
1236            "wiki-page",
1237            "Unindexed",
1238            "[[records/contacts/sarah]]",
1239        );
1240
1241        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1242        assert_eq!(
1243            paths(&got),
1244            vec!["wiki/people/indexed", "wiki/people/unindexed"],
1245            "unscoped backlinks ripgrep-scans the tree, so the on-disk-but-unindexed \
1246             linker is found too — not only the sidecar-catalogued one"
1247        );
1248    }
1249
1250    #[test]
1251    fn backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk() {
1252        // REGRESSION (scale contract): the SCOPED form (`--type` / `--in`) is the
1253        // I/O-scoped path — it enumerates candidates from the relevant type-folder
1254        // `index.jsonl` sidecars and parses only those, NOT a whole-tree walk.
1255        // That is what makes the scope an I/O scope, not just a result filter:
1256        // a linker that is on disk but ABSENT from the sidecar (stale / never-built
1257        // index) is NOT discovered by the scoped call (the sidecar bounds which
1258        // files are candidates). This is the loop-vs-walk distinction the SPEC
1259        // draws, and it is exactly the inverse of the unscoped tree scan above.
1260        let fx = Fixture::new();
1261        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1262        fx.write(
1263            "wiki/people/indexed.md",
1264            "wiki-page",
1265            "Indexed",
1266            "[[records/contacts/sarah]]",
1267        );
1268        fx.reindex(); // builds sidecars for sarah + the indexed linker
1269
1270        // Drop a NEW wiki-page linker on disk WITHOUT reindexing — on disk, in no
1271        // sidecar.
1272        fx.write(
1273            "wiki/people/unindexed.md",
1274            "wiki-page",
1275            "Unindexed",
1276            "[[records/contacts/sarah]]",
1277        );
1278
1279        // Scoped to the `wiki-page` type: the candidate set is the sidecar's, so
1280        // only the catalogued linker is found — the unindexed one is invisible.
1281        let only_wiki_pages = vec!["wiki-page".to_string()];
1282        let got = backlinks_filtered(
1283            &fx.store,
1284            &fx.p("records/contacts/sarah.md"),
1285            &only_wiki_pages,
1286            None,
1287        )
1288        .unwrap();
1289        assert_eq!(
1290            paths(&got),
1291            vec!["wiki/people/indexed"],
1292            "scoped backlinks reads the sidecar candidate set; the on-disk-but-unindexed \
1293             linker is not tree-walked"
1294        );
1295    }
1296
1297    #[test]
1298    fn backlinks_filtered_type_scopes_the_candidate_set() {
1299        // `--type` narrows backlinks to linkers of that type. Two files link to
1300        // the target — one `meeting`, one `wiki-page`; filtering to `meeting`
1301        // returns only the meeting.
1302        let fx = Fixture::new();
1303        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1304        fx.write(
1305            "records/meetings/m1.md",
1306            "meeting",
1307            "Call",
1308            "[[records/contacts/sarah]]",
1309        );
1310        fx.write(
1311            "wiki/people/bio.md",
1312            "wiki-page",
1313            "Bio",
1314            "[[records/contacts/sarah]]",
1315        );
1316        fx.reindex();
1317
1318        let only_meetings = vec!["meeting".to_string()];
1319        let got = backlinks_filtered(
1320            &fx.store,
1321            &fx.p("records/contacts/sarah.md"),
1322            &only_meetings,
1323            None,
1324        )
1325        .unwrap();
1326        assert_eq!(
1327            paths(&got),
1328            vec!["records/meetings/m1"],
1329            "--type meeting must exclude the wiki-page linker"
1330        );
1331
1332        // Unfiltered, both come back — proving the filter (not the data) dropped one.
1333        let all = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1334        assert_eq!(paths(&all), vec!["records/meetings/m1", "wiki/people/bio"]);
1335    }
1336
1337    #[test]
1338    fn backlinks_filtered_layer_scopes_the_candidate_set() {
1339        // `--in <layer>` narrows backlinks to linkers under that layer.
1340        let fx = Fixture::new();
1341        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1342        fx.write(
1343            "records/meetings/m1.md",
1344            "meeting",
1345            "Call",
1346            "[[records/contacts/sarah]]",
1347        );
1348        fx.write(
1349            "wiki/people/bio.md",
1350            "wiki-page",
1351            "Bio",
1352            "[[records/contacts/sarah]]",
1353        );
1354        fx.reindex();
1355
1356        let got = backlinks_filtered(
1357            &fx.store,
1358            &fx.p("records/contacts/sarah.md"),
1359            &[],
1360            Some(Layer::Wiki),
1361        )
1362        .unwrap();
1363        assert_eq!(
1364            paths(&got),
1365            vec!["wiki/people/bio"],
1366            "--in wiki must keep only the wiki-layer linker"
1367        );
1368
1369        let records_only = backlinks_filtered(
1370            &fx.store,
1371            &fx.p("records/contacts/sarah.md"),
1372            &[],
1373            Some(Layer::Records),
1374        )
1375        .unwrap();
1376        assert_eq!(paths(&records_only), vec!["records/meetings/m1"]);
1377    }
1378
1379    // ── neighborhood ─────────────────────────────────────────────────────────
1380
1381    #[test]
1382    fn neighborhood_hops_zero_is_empty() {
1383        let fx = Fixture::new();
1384        fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
1385        fx.write("wiki/people/b.md", "wiki-page", "B", "");
1386        let slice = neighborhood(
1387            &fx.store,
1388            &fx.p("wiki/people/a.md"),
1389            0,
1390            &[],
1391            Direction::Both,
1392        )
1393        .unwrap();
1394        assert_eq!(slice.seed, fx.p("wiki/people/a"));
1395        assert!(slice.nodes.is_empty());
1396    }
1397
1398    #[test]
1399    fn neighborhood_outgoing_one_hop_reads_summary_and_type() {
1400        let fx = Fixture::new();
1401        fx.write(
1402            "wiki/people/a.md",
1403            "wiki-page",
1404            "Person A",
1405            "Knows [[records/contacts/b]].",
1406        );
1407        fx.write("records/contacts/b.md", "contact", "Contact B summary", "");
1408        let slice = neighborhood(
1409            &fx.store,
1410            &fx.p("wiki/people/a.md"),
1411            1,
1412            &[],
1413            Direction::Outgoing,
1414        )
1415        .unwrap();
1416        assert_eq!(slice.nodes.len(), 1);
1417        let n = &slice.nodes[0];
1418        assert_eq!(n.path, fx.p("records/contacts/b"));
1419        assert_eq!(n.summary, "Contact B summary");
1420        assert_eq!(n.type_.as_deref(), Some("contact"));
1421        assert_eq!(n.hops, 1);
1422        assert_eq!(n.via, Some((fx.p("wiki/people/a"), Direction::Outgoing)));
1423    }
1424
1425    #[test]
1426    fn neighborhood_incoming_only_walks_backlinks() {
1427        let fx = Fixture::new();
1428        // a -> seed (incoming to seed). seed -> c (outgoing from seed).
1429        fx.write(
1430            "wiki/people/seed.md",
1431            "wiki-page",
1432            "Seed",
1433            "Out to [[wiki/people/c]].",
1434        );
1435        fx.write(
1436            "wiki/people/a.md",
1437            "wiki-page",
1438            "A",
1439            "In to [[wiki/people/seed]].",
1440        );
1441        fx.write("wiki/people/c.md", "wiki-page", "C", "");
1442        fx.reindex();
1443        let slice = neighborhood(
1444            &fx.store,
1445            &fx.p("wiki/people/seed.md"),
1446            1,
1447            &[],
1448            Direction::Incoming,
1449        )
1450        .unwrap();
1451        // Incoming direction: only `a` (which links TO seed), not `c`.
1452        assert_eq!(
1453            paths(
1454                &slice
1455                    .nodes
1456                    .iter()
1457                    .map(|n| n.path.clone())
1458                    .collect::<Vec<_>>()
1459            ),
1460            vec!["wiki/people/a"]
1461        );
1462        assert_eq!(
1463            slice.nodes[0].via,
1464            Some((fx.p("wiki/people/seed"), Direction::Incoming))
1465        );
1466    }
1467
1468    #[test]
1469    fn neighborhood_bounded_bfs_respects_hop_limit_and_min_distance() {
1470        let fx = Fixture::new();
1471        // Chain a -> b -> c -> d, all outgoing.
1472        fx.write("wiki/c/a.md", "wiki-page", "A", "[[wiki/c/b]]");
1473        fx.write("wiki/c/b.md", "wiki-page", "B", "[[wiki/c/c]]");
1474        fx.write("wiki/c/c.md", "wiki-page", "C", "[[wiki/c/d]]");
1475        fx.write("wiki/c/d.md", "wiki-page", "D", "");
1476        let slice =
1477            neighborhood(&fx.store, &fx.p("wiki/c/a.md"), 2, &[], Direction::Outgoing).unwrap();
1478        // 2 hops reaches b (1) and c (2), not d (3).
1479        let by_path: HashMap<String, u32> = slice
1480            .nodes
1481            .iter()
1482            .map(|n| (n.path.to_string_lossy().to_string(), n.hops))
1483            .collect();
1484        assert_eq!(by_path.get("wiki/c/b").copied(), Some(1));
1485        assert_eq!(by_path.get("wiki/c/c").copied(), Some(2));
1486        assert_eq!(by_path.get("wiki/c/d"), None);
1487        assert_eq!(slice.nodes.len(), 2);
1488    }
1489
1490    #[test]
1491    fn neighborhood_records_min_hops_on_diamond() {
1492        let fx = Fixture::new();
1493        // Diamond: a -> b, a -> c, b -> d, c -> d. d is reachable at hop 2 from
1494        // either branch; it must be recorded once, at hop 2.
1495        fx.write("wiki/d/a.md", "wiki-page", "A", "[[wiki/d/b]] [[wiki/d/c]]");
1496        fx.write("wiki/d/b.md", "wiki-page", "B", "[[wiki/d/d]]");
1497        fx.write("wiki/d/c.md", "wiki-page", "C", "[[wiki/d/d]]");
1498        fx.write("wiki/d/d.md", "wiki-page", "D", "");
1499        let slice =
1500            neighborhood(&fx.store, &fx.p("wiki/d/a.md"), 3, &[], Direction::Outgoing).unwrap();
1501        let d_nodes: Vec<&ContextNode> = slice
1502            .nodes
1503            .iter()
1504            .filter(|n| n.path == fx.p("wiki/d/d"))
1505            .collect();
1506        assert_eq!(d_nodes.len(), 1, "d must appear exactly once");
1507        assert_eq!(d_nodes[0].hops, 2, "d's min distance from a is 2");
1508        // b and c at hop 1, d at hop 2 => 3 nodes total, no cycle blowup.
1509        assert_eq!(slice.nodes.len(), 3);
1510    }
1511
1512    #[test]
1513    fn neighborhood_type_filter_narrows_results_but_not_traversal() {
1514        let fx = Fixture::new();
1515        // seed -> contact -> meeting. Filtering to `meeting` must still reach
1516        // the meeting THROUGH the (excluded) contact at hop 2.
1517        fx.write(
1518            "wiki/people/seed.md",
1519            "wiki-page",
1520            "Seed",
1521            "[[records/contacts/sarah]]",
1522        );
1523        fx.write(
1524            "records/contacts/sarah.md",
1525            "contact",
1526            "Sarah",
1527            "[[records/meetings/m1]]",
1528        );
1529        fx.write("records/meetings/m1.md", "meeting", "Renewal call", "");
1530        let only_meetings = vec!["meeting".to_string()];
1531        let slice = neighborhood(
1532            &fx.store,
1533            &fx.p("wiki/people/seed.md"),
1534            2,
1535            &only_meetings,
1536            Direction::Outgoing,
1537        )
1538        .unwrap();
1539        // Only the meeting is returned; the contact is traversed but filtered out.
1540        assert_eq!(slice.nodes.len(), 1);
1541        assert_eq!(slice.nodes[0].path, fx.p("records/meetings/m1"));
1542        assert_eq!(slice.nodes[0].type_.as_deref(), Some("meeting"));
1543        assert_eq!(slice.nodes[0].hops, 2);
1544    }
1545
1546    #[test]
1547    fn neighborhood_cycle_terminates() {
1548        let fx = Fixture::new();
1549        // a <-> b cycle. Must not loop forever; each appears once.
1550        fx.write("wiki/g/a.md", "wiki-page", "A", "[[wiki/g/b]]");
1551        fx.write("wiki/g/b.md", "wiki-page", "B", "[[wiki/g/a]]");
1552        fx.reindex();
1553        let slice =
1554            neighborhood(&fx.store, &fx.p("wiki/g/a.md"), 10, &[], Direction::Both).unwrap();
1555        // From a: b is the only other node (a is the seed, excluded).
1556        assert_eq!(
1557            paths(
1558                &slice
1559                    .nodes
1560                    .iter()
1561                    .map(|n| n.path.clone())
1562                    .collect::<Vec<_>>()
1563            ),
1564            vec!["wiki/g/b"]
1565        );
1566    }
1567
1568    // ── orphans ──────────────────────────────────────────────────────────────
1569
1570    #[test]
1571    fn orphans_finds_files_with_no_edges_either_direction() {
1572        let fx = Fixture::new();
1573        // Wired pair: a links to b (a has outgoing, b has incoming).
1574        fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
1575        fx.write("wiki/people/b.md", "wiki-page", "B", "");
1576        // Orphan: no links in or out.
1577        fx.write(
1578            "sources/emails/lonely.md",
1579            "email",
1580            "Lonely email",
1581            "Just text, no links.",
1582        );
1583        let got = orphans(&fx.store, None).unwrap();
1584        assert_eq!(paths(&got), vec!["sources/emails/lonely.md"]);
1585    }
1586
1587    #[test]
1588    fn orphans_file_with_only_outgoing_is_not_orphan() {
1589        let fx = Fixture::new();
1590        // `a` links out to a target that does NOT exist on disk. `a` still has
1591        // an outgoing edge, so it is not an orphan (orphan = no edges at all).
1592        fx.write(
1593            "wiki/people/a.md",
1594            "wiki-page",
1595            "A",
1596            "[[records/contacts/ghost]]",
1597        );
1598        let got = orphans(&fx.store, None).unwrap();
1599        assert!(
1600            !paths(&got).contains(&"wiki/people/a.md".to_string()),
1601            "outgoing-only is not an orphan: {got:?}"
1602        );
1603    }
1604
1605    #[test]
1606    fn orphans_file_with_only_incoming_is_not_orphan() {
1607        let fx = Fixture::new();
1608        // `target` has no outgoing links but IS linked to by `linker` — not an orphan.
1609        fx.write("records/contacts/target.md", "contact", "Target", "");
1610        fx.write(
1611            "wiki/people/linker.md",
1612            "wiki-page",
1613            "Linker",
1614            "[[records/contacts/target]]",
1615        );
1616        let got = orphans(&fx.store, None).unwrap();
1617        assert!(
1618            !paths(&got).contains(&"records/contacts/target.md".to_string()),
1619            "incoming-only is not an orphan: {got:?}"
1620        );
1621        // `linker` has outgoing, so also not an orphan.
1622        assert!(!paths(&got).contains(&"wiki/people/linker.md".to_string()));
1623    }
1624
1625    #[test]
1626    fn orphans_incoming_link_from_other_layer_unorphans() {
1627        let fx = Fixture::new();
1628        // Candidate in records/, only incoming edge comes from wiki/ — a
1629        // cross-layer link must still un-orphan it even when scoped to records.
1630        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1631        fx.write(
1632            "wiki/people/sarah.md",
1633            "wiki-page",
1634            "bio",
1635            "[[records/contacts/sarah]]",
1636        );
1637        // A genuine orphan in records/ to prove the scope still returns something.
1638        fx.write("records/contacts/nemo.md", "contact", "Nemo", "");
1639        let got = orphans(&fx.store, Some(Layer::Records)).unwrap();
1640        assert_eq!(paths(&got), vec!["records/contacts/nemo.md"]);
1641    }
1642
1643    #[test]
1644    fn orphans_layer_scope_filters_candidates() {
1645        let fx = Fixture::new();
1646        // One orphan per layer.
1647        fx.write("sources/emails/s.md", "email", "S", "no links");
1648        fx.write("records/contacts/r.md", "contact", "R", "");
1649        fx.write("wiki/people/w.md", "wiki-page", "W", "");
1650        let only_wiki = orphans(&fx.store, Some(Layer::Wiki)).unwrap();
1651        assert_eq!(paths(&only_wiki), vec!["wiki/people/w.md"]);
1652        let only_sources = orphans(&fx.store, Some(Layer::Sources)).unwrap();
1653        assert_eq!(paths(&only_sources), vec!["sources/emails/s.md"]);
1654        // No scope: all three, sorted (records, sources, wiki).
1655        let all = orphans(&fx.store, None).unwrap();
1656        assert_eq!(
1657            paths(&all),
1658            vec![
1659                "records/contacts/r.md",
1660                "sources/emails/s.md",
1661                "wiki/people/w.md",
1662            ]
1663        );
1664    }
1665
1666    #[test]
1667    fn orphans_self_link_does_not_count_as_an_edge() {
1668        let fx = Fixture::new();
1669        // A page that only links to itself has no real edges => still an orphan.
1670        fx.write(
1671            "wiki/synthesis/solo.md",
1672            "wiki-page",
1673            "Solo",
1674            "I reference [[wiki/synthesis/solo]] only.",
1675        );
1676        let got = orphans(&fx.store, None).unwrap();
1677        assert_eq!(paths(&got), vec!["wiki/synthesis/solo.md"]);
1678    }
1679
1680    #[test]
1681    fn orphans_excludes_index_and_db_files() {
1682        let fx = Fixture::new();
1683        // A lone index.md / DB.md must never be reported as an orphan content file.
1684        fx.write_raw(
1685            "wiki/index.md",
1686            "---\ntype: index\nscope: layer\nfolder: wiki\n---\n# wiki\n",
1687        );
1688        fx.write(
1689            "wiki/people/real-orphan.md",
1690            "wiki-page",
1691            "Real",
1692            "no links",
1693        );
1694        let got = orphans(&fx.store, None).unwrap();
1695        assert_eq!(paths(&got), vec!["wiki/people/real-orphan.md"]);
1696    }
1697
1698    // ── frontmatter_block helper ─────────────────────────────────────────────
1699
1700    #[test]
1701    fn frontmatter_block_extracts_between_fences() {
1702        let text = "---\ntype: contact\nsummary: hi\n---\nbody here\n";
1703        assert_eq!(
1704            frontmatter_block(text),
1705            Some("type: contact\nsummary: hi\n")
1706        );
1707    }
1708
1709    #[test]
1710    fn frontmatter_block_none_without_leading_fence() {
1711        let text = "no frontmatter here\n";
1712        assert_eq!(frontmatter_block(text), None);
1713    }
1714}