Skip to main content

dbmd_core/
graph.rs

1//! `graph` — the wiki-link **relationship layer**.
2//!
3//! Wiki-links are curated-relevance edges (the LLM wrote them), so the graph's
4//! job is to **assemble the relevant context around a seed**, not to be
5//! analyzed. **All ops are on-demand — there is no maintained graph** (a
6//! persistent graph is the roadmap engine).
7//!
8//! [`backlinks`] / [`forwardlinks`] are loop ops (O(changed), never O(store)).
9//! [`neighborhood`] is the high-value context-hydration op. [`orphans`] is a
10//! SWEEP curation worklist.
11//!
12//! Whole-graph analytics (connected components, cycle detection, shortest
13//! path, sinks/sources, DOT/JSON export) are deliberately **not** here — a
14//! human studying the graph opens the store in Obsidian; broken-link detection
15//! is [`crate::validate`]'s job (`WIKI_LINK_BROKEN`).
16//!
17//! ## Implementation note — two paths for the incoming-edge scan
18//!
19//! The scale contract (SPEC § Tooling, plan: *"the interactive loop is
20//! O(changed), never O(store)"*) is the load-bearing rule here. [`backlinks`]
21//! is a loop op, so it must **not** open and `read_to_string` every content file
22//! in the store on each call. It resolves incoming edges by one of two paths,
23//! chosen by whether the call is scoped:
24//!
25//! - **Unscoped** (`dbmd graph backlinks <x>`, no `--type`/`--in`): one
26//!   embedded-ripgrep pass for the literal `[[<target>]]` over the tree, via
27//!   [`Store::find_links_to`] (`grep` + `ignore`, early-exit per file) — the
28//!   same scan engine [`crate::validate`]'s working-set incoming-linker step
29//!   uses. A single store traversal with cheap presence-only matching, not N
30//!   whole-file parses; that is what keeps the unscoped call inside the loop
31//!   budget. [`backlinks`] then filters the raw hits to content files and emits
32//!   canonical bare targets (its relationship view), where the lower-level
33//!   [`Store::find_links_to`] returns every `.md` the text appears in.
34//! - **Scoped** (`--type` / `--in`): the candidate set is enumerated from the
35//!   relevant type-folder `index.jsonl` sidecars — one sequential read per
36//!   type-folder (via [`crate::query::Query`], which sits on
37//!   [`Store::read_type_index`]) — and each candidate is confirmed by a
38//!   single-file parse. That is what makes `--type` / `--in` an *I/O* scope, not
39//!   just a result filter: a typed/layer-scoped `backlinks` reads only the
40//!   relevant folder(s)' sidecars and parses only those files.
41//!
42//! **Why the scoped path confirms by parsing the candidate, not by trusting the
43//! sidecar's `links` field.** A sidecar record's `links` is the file's
44//! *frontmatter* `links:` list only — it does **not** capture wiki-links written
45//! in the body or inside other typed frontmatter fields (`company: [[…]]`,
46//! `attendees: [ … ]`, `derived_from: [ … ]`). [`forwardlinks`] extracts edges
47//! from the whole file, so to keep the two directions on the **same** edge set
48//! (an incoming edge to X is exactly: some file whose [`forwardlinks`] contains
49//! X) the incoming-edge confirmation re-parses each candidate file the same way.
50//! The sidecar bounds *which* files are candidates; the parse decides whether
51//! each truly links. The unscoped ripgrep path stays on that same edge set by
52//! matching the link text wherever it lives in the file (frontmatter or body).
53//! A node's `summary` / `type` likewise read frontmatter directly (the source of
54//! truth the sidecar is derived from; never stale).
55
56use std::collections::{BTreeSet, HashMap, HashSet, VecDeque};
57use std::io;
58use std::path::{Path, PathBuf};
59
60use ignore::WalkBuilder;
61use regex::Regex;
62
63use crate::index::IndexRecord;
64use crate::query::Query;
65use crate::store::{Layer, Store, StoreError};
66
67/// Which edge directions a traversal follows.
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
69pub enum Direction {
70    /// Incoming edges only (backlinks).
71    Incoming,
72    /// Outgoing edges only (forwardlinks).
73    Outgoing,
74    /// Both directions.
75    Both,
76}
77
78/// One node reached during a [`neighborhood`] hydration: the file, its
79/// `summary`, and how it connects back toward the seed.
80#[derive(Debug, Clone, PartialEq, Eq)]
81pub struct ContextNode {
82    /// The store-relative path of the reached file.
83    pub path: PathBuf,
84    /// The file's `summary` (read from its sidecar entry / frontmatter).
85    pub summary: String,
86    /// The file's `type`, when known.
87    pub type_: Option<String>,
88    /// Hop distance from the seed (the seed itself is 0).
89    pub hops: u32,
90    /// The relationship edge that brought this node into the slice: the path it
91    /// links to/from one hop closer to the seed, and the direction.
92    pub via: Option<(PathBuf, Direction)>,
93}
94
95/// The readable working-set digest [`neighborhood`] returns: the seed plus the
96/// reached nodes with their summaries and connections. The relationship-axis
97/// "turn a seed into context" primitive.
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub struct ContextSlice {
100    /// The seed the slice was hydrated from.
101    pub seed: PathBuf,
102    /// The reached nodes (excluding the seed), in BFS order.
103    pub nodes: Vec<ContextNode>,
104}
105
106/// Incoming edges to `path`: files that wiki-link to it. The blast-radius /
107/// dependents primitive before an edit. Store-wide (every layer / every type);
108/// see [`backlinks_filtered`] for the `--type` / `--in`-scoped form.
109///
110/// `path` is the store-relative target as it would be written inside a
111/// wiki-link (with or without a trailing `.md`; both resolve to the same
112/// target). Returns each linking file as its **canonical bare wiki-link path**
113/// (store-relative, no `.md`) — the same key [`forwardlinks`] emits, so the two
114/// directions round-trip and [`neighborhood`] can use one node identity.
115/// Deduped, sorted, never including the seed itself.
116pub fn backlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
117    backlinks_filtered(store, path, &[], None)
118}
119
120/// Incoming edges to `path`, scoped by the linking file's `type` and/or layer —
121/// the `dbmd graph backlinks --type/--in` surface.
122///
123/// **Scale (the loop contract).** Two paths, by whether the call is scoped:
124///
125/// - **Unscoped** (`types` empty *and* `layer` `None`): one embedded-ripgrep
126///   pass for `[[<target>]]` across the store via [`Store::find_links_to`] — a
127///   single `grep` + `ignore` traversal with early-exit per file, never a
128///   `read_to_string` of every content file. This is the same scan engine
129///   [`crate::validate::validate_working_set`]'s incoming-linker step rides, and
130///   it keeps the unscoped call inside the loop budget (the old per-candidate
131///   confirm-read re-opened every file in the store → O(store)).
132/// - **Scoped** (`types` and/or `layer` set): the candidate set — the files that
133///   *might* link to `path` — is read from the relevant type-folder
134///   `index.jsonl` sidecars, so the call touches only the named folder(s):
135///   O(folder), the sanctioned loop cost. Each candidate is then confirmed by a
136///   single-file parse. When `types` lists several types, every named type's
137///   folder is read and the candidate sets unioned; a `layer` further restricts
138///   the candidate paths to that layer.
139///
140/// **Correctness (one edge set, both paths).** An incoming edge to X is exactly:
141/// some file whose [`forwardlinks`] contains X — a wiki-link in the body or in
142/// *any* frontmatter field (`company: [[…]]`, `attendees: [ … ]`), not just the
143/// sidecar's frontmatter `links:` projection. Both paths honor that:
144/// - The unscoped scan matches the literal `[[<target>]]` text wherever it lives
145///   in a file (frontmatter or body), the same edges [`forwardlinks`] extracts.
146///   [`Store::find_links_to`] returns *every* `.md` carrying the link text
147///   (including `index.md` catalogs); [`backlinks`] is the relationship view, so
148///   the results are filtered to content files ([`is_content_rel`]) and emitted
149///   as canonical bare targets, self-excluded.
150/// - The scoped path confirms each candidate via [`file_links_to`], which
151///   delegates to [`forwardlinks`] (body + every frontmatter field) — so a
152///   body-only or typed-field edge is caught, not just the sidecar's `links:`
153///   list.
154///
155/// Result form (canonical bare paths, deduped, sorted, seed excluded) is
156/// identical on both paths and matches [`backlinks`].
157pub fn backlinks_filtered(
158    store: &Store,
159    path: &Path,
160    types: &[String],
161    layer: Option<Layer>,
162) -> Result<Vec<PathBuf>, StoreError> {
163    let target = normalize_target(path);
164    if target.is_empty() {
165        return Ok(Vec::new());
166    }
167
168    // Unscoped: one embedded-ripgrep pass over the store (O(store) scan with
169    // early-exit per file), not a per-candidate read of every content file.
170    // `find_links_to` returns every `.md` carrying the link text (incl. catalog
171    // `index.md`); narrow to content files and canonicalize to the bare target
172    // form `backlinks` emits, dropping the seed's self-link.
173    if types.is_empty() && layer.is_none() {
174        let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
175        for rel in store.find_links_to(path)? {
176            if !is_content_rel(&rel) {
177                continue;
178            }
179            let linker = normalize_target(&rel);
180            if linker.is_empty() || linker == target {
181                // A file never counts as its own backlink.
182                continue;
183            }
184            hits.insert(PathBuf::from(linker));
185        }
186        return Ok(hits.into_iter().collect());
187    }
188
189    // Scoped: read only the named folder(s)' sidecars for the candidate set, then
190    // confirm each candidate with a single-file parse — O(folder), the I/O scope
191    // `--type` / `--in` buys.
192    let mut hits: BTreeSet<PathBuf> = BTreeSet::new();
193    for candidate in candidate_records(store, types, layer)? {
194        let rel = &candidate.path;
195        let candidate_target = normalize_target(rel);
196        if candidate_target.is_empty() || candidate_target == target {
197            // A file never counts as its own backlink.
198            continue;
199        }
200        // Confirm the edge by parsing the candidate file the same way
201        // forwardlinks does (body + all frontmatter), so body/typed-field links
202        // are caught — the sidecar's `links` field alone would miss them.
203        if file_links_to(store, rel, &target)? {
204            hits.insert(PathBuf::from(candidate_target));
205        }
206    }
207
208    Ok(hits.into_iter().collect())
209}
210
211/// Outgoing edges from `path`: the wiki-link targets extracted from that single
212/// file. Loop-fast; follow the evidence chain.
213///
214/// `path` is the store-relative path of the file to read. Targets are returned
215/// as store-relative paths (bare, no `.md`), deduped and sorted; the file's
216/// links to itself are dropped. A missing file yields an empty list (a
217/// dangling seed has no outgoing edges to report — broken-link detection is
218/// [`crate::validate`]'s job).
219pub fn forwardlinks(store: &Store, path: &Path) -> Result<Vec<PathBuf>, StoreError> {
220    let self_target = normalize_target(path);
221    let abs = match resolve_existing(store, path) {
222        Some(a) => a,
223        None => return Ok(Vec::new()),
224    };
225    let body = match std::fs::read_to_string(&abs) {
226        Ok(b) => b,
227        // A file that isn't valid UTF-8 (e.g. a binary source) carries no
228        // wiki-links we can extract.
229        Err(e) if e.kind() == io::ErrorKind::InvalidData => return Ok(Vec::new()),
230        Err(e) => return Err(StoreError::Io(e)),
231    };
232
233    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
234    for target in extract_link_targets(&body) {
235        if target.is_empty() || target == self_target {
236            continue;
237        }
238        out.insert(PathBuf::from(target));
239    }
240    Ok(out.into_iter().collect())
241}
242
243/// The candidate set for an incoming-edge scan: the sidecar records that could
244/// link to the target, read from the type-folder `index.jsonl` sidecars (never
245/// a content-tree walk). `types`/`layer` narrow *which* sidecars are read — the
246/// I/O scope that keeps a typed/layer backlinks O(folder).
247///
248/// - `types` non-empty: read each type's folder sidecar (via [`Query`], which
249///   sits on [`Store::read_type_index`]), optionally layer-scoped, and union the
250///   records by path (a file appears once even if two type reads surface it).
251/// - `types` empty: every sidecar record under `layer` (or store-wide when
252///   `None`) via [`Store::sidecar_records`].
253fn candidate_records(
254    store: &Store,
255    types: &[String],
256    layer: Option<Layer>,
257) -> Result<Vec<IndexRecord>, StoreError> {
258    if types.is_empty() {
259        return store.sidecar_records(layer);
260    }
261    let mut by_path: std::collections::BTreeMap<PathBuf, IndexRecord> =
262        std::collections::BTreeMap::new();
263    for type_ in types {
264        let mut q = Query::new().with_type(type_);
265        if let Some(layer) = layer {
266            q = q.with_layer(layer);
267        }
268        for rec in q.execute(store)? {
269            by_path.insert(rec.path.clone(), rec);
270        }
271    }
272    Ok(by_path.into_values().collect())
273}
274
275/// True if the store file at `rel` carries a wiki-link whose canonical target
276/// equals `target`. Delegates to [`forwardlinks`] so the incoming-edge predicate
277/// is *exactly* the outgoing-edge extraction — body + every frontmatter field —
278/// keeping the two directions on one edge set. `forwardlinks` already emits
279/// canonical bare targets, so `target` (likewise normalized by the caller) is
280/// compared directly. A missing/binary file links to nothing.
281fn file_links_to(store: &Store, rel: &Path, target: &str) -> Result<bool, StoreError> {
282    let edges = forwardlinks(store, rel)?;
283    Ok(edges.iter().any(|e| e.as_os_str() == target))
284}
285
286/// **Context hydration.** Bounded BFS from `seed` over backlinks + forwardlinks
287/// out to `hops`, reading each reached file's `summary` + relationship, and
288/// returning a readable [`ContextSlice`]. Optionally filtered by `types` and
289/// `direction`. On-demand; no maintained graph. What the agent reaches for to
290/// assemble a working set in one call.
291///
292/// Traversal semantics:
293/// - **`hops`** bounds true graph distance from the seed. `hops == 0` returns
294///   an empty slice (the seed alone is no context).
295/// - **`direction`** selects which edges are followed: `Incoming` walks
296///   backlinks, `Outgoing` walks forwardlinks, `Both` walks the union.
297/// - **`types`**, when non-empty, filters which reached nodes appear in the
298///   slice — but traversal still passes *through* off-type nodes, so a
299///   `meeting` two hops out is still reachable through a `contact` even when
300///   filtering to `meeting`. (An empty `types` slice imposes no filter.)
301/// - Each node records the lowest hop count at which it is first reached (BFS
302///   order); the seed is never included as a node.
303pub fn neighborhood(
304    store: &Store,
305    seed: &Path,
306    hops: u32,
307    types: &[String],
308    direction: Direction,
309) -> Result<ContextSlice, StoreError> {
310    let seed_rel = PathBuf::from(normalize_target(seed));
311    let type_filter: HashSet<&str> = types.iter().map(|s| s.as_str()).collect();
312
313    // `discovered` guards against revisiting a node (and against re-adding the
314    // seed). BFS by levels so the first time we reach a node is its true min
315    // hop distance.
316    let mut discovered: HashSet<PathBuf> = HashSet::new();
317    discovered.insert(seed_rel.clone());
318
319    let mut nodes: Vec<ContextNode> = Vec::new();
320    let mut frontier: VecDeque<PathBuf> = VecDeque::new();
321    frontier.push_back(seed_rel.clone());
322
323    let mut hop = 0u32;
324    while hop < hops && !frontier.is_empty() {
325        hop += 1;
326        let level_size = frontier.len();
327        for _ in 0..level_size {
328            let current = frontier.pop_front().expect("frontier non-empty");
329
330            // Collect this node's edges in the requested direction(s). Each
331            // edge carries the neighbor path + the direction we traversed it.
332            let mut edges: Vec<(PathBuf, Direction)> = Vec::new();
333            if matches!(direction, Direction::Outgoing | Direction::Both) {
334                for nbr in forwardlinks(store, &current)? {
335                    edges.push((nbr, Direction::Outgoing));
336                }
337            }
338            if matches!(direction, Direction::Incoming | Direction::Both) {
339                for nbr in backlinks(store, &current)? {
340                    edges.push((nbr, Direction::Incoming));
341                }
342            }
343
344            for (neighbor, dir) in edges {
345                if !discovered.insert(neighbor.clone()) {
346                    continue;
347                }
348                let (summary, type_) = read_summary_and_type(store, &neighbor);
349                let include = type_filter.is_empty()
350                    || type_
351                        .as_deref()
352                        .map(|t| type_filter.contains(t))
353                        .unwrap_or(false);
354                if include {
355                    nodes.push(ContextNode {
356                        path: neighbor.clone(),
357                        summary,
358                        type_,
359                        hops: hop,
360                        via: Some((current.clone(), dir)),
361                    });
362                }
363                // Off-type nodes are not emitted but still seed the next BFS
364                // level, so the type filter narrows the *result*, not the
365                // reachable graph.
366                frontier.push_back(neighbor);
367            }
368        }
369    }
370
371    Ok(ContextSlice {
372        seed: seed_rel,
373        nodes,
374    })
375}
376
377/// **SWEEP.** Content files with no incoming AND no outgoing wiki-links — the
378/// curation worklist ("ingested but not yet wired into the wiki"). Off the
379/// loop. Optionally scoped to a layer.
380///
381/// A file is an orphan iff it neither links out to another store file nor is
382/// linked to by one. Incoming edges are counted across the *whole* store
383/// (a link from any layer un-orphans a file), even when `layer` scopes the
384/// candidate set. Returns store-relative paths, sorted.
385pub fn orphans(store: &Store, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
386    // One walk of the whole store: for every content file, record (a) whether
387    // it has any outgoing link, and (b) accumulate the set of every target any
388    // file links to (its incoming-edge set). Both come from a single read per
389    // file — the SWEEP cost.
390    let all = walk_content_files(store)?;
391
392    let mut linked_to: HashSet<PathBuf> = HashSet::new();
393    let mut has_outgoing: HashMap<PathBuf, bool> = HashMap::new();
394
395    for abs in &all {
396        let rel = match rel_path(store, abs) {
397            Some(r) => r,
398            None => continue,
399        };
400        let self_target = normalize_target(&rel);
401
402        let body = match std::fs::read_to_string(abs) {
403            Ok(b) => b,
404            Err(e) if e.kind() == io::ErrorKind::InvalidData => String::new(),
405            Err(e) => return Err(StoreError::Io(e)),
406        };
407
408        let mut outgoing = false;
409        for target in extract_link_targets(&body) {
410            if target.is_empty() || target == self_target {
411                continue;
412            }
413            if resolve_existing(store, Path::new(&target)).is_none() {
414                continue;
415            }
416            outgoing = true;
417            linked_to.insert(PathBuf::from(target));
418        }
419        has_outgoing.insert(rel, outgoing);
420    }
421
422    let mut out: BTreeSet<PathBuf> = BTreeSet::new();
423    for abs in &all {
424        let rel = match rel_path(store, abs) {
425            Some(r) => r,
426            None => continue,
427        };
428        if let Some(layer) = layer {
429            if path_layer(&rel) != Some(layer) {
430                continue;
431            }
432        }
433        let outgoing = has_outgoing.get(&rel).copied().unwrap_or(false);
434        let incoming = linked_to.contains(&PathBuf::from(normalize_target(&rel)));
435        if !outgoing && !incoming {
436            out.insert(rel);
437        }
438    }
439
440    Ok(out.into_iter().collect())
441}
442
443/// **Write-side.** Rewrite every incoming `[[old]]` wiki-link in `text` to
444/// `[[new]]`, preserving any `|display` override and emitting the canonical bare
445/// target (no `.md`). The write-side twin of [`backlinks`]: where `backlinks`
446/// *finds* the files carrying an edge to `old`, this *retargets* that edge to
447/// `new` inside one file's contents.
448///
449/// `old` and `new` are store-relative paths in the wiki-link sense — both are
450/// passed through the same [`normalize_target`] the read side keys on, so the
451/// `.md` and bare spellings of `old` collapse to one target and a match here is
452/// exactly a match [`backlinks`] / [`Store::find_links_to`](crate::Store::find_links_to)
453/// would report. A link is rewritten iff its normalized target equals
454/// `normalize_target(old)`; prefix collisions (`old=a/b` vs `[[a/bc]]`) and
455/// short-form links never match. Returns the rewritten text (identical to the
456/// input when nothing matched), so the caller can cheaply detect a no-op.
457///
458/// Operates on the raw bytes (not a parser round-trip) so a link in frontmatter
459/// or body is retargeted uniformly and nothing else is reflowed.
460pub fn rewrite_links_to(text: &str, old: &Path, new: &Path) -> String {
461    let old_target = normalize_target(old);
462    let new_target = normalize_target(new);
463    if old_target.is_empty() {
464        // No target to match → never rewrite anything.
465        return text.to_string();
466    }
467
468    let re = rewrite_link_re();
469    let mut out = String::with_capacity(text.len());
470    let mut last = 0usize;
471    for caps in re.captures_iter(text) {
472        let whole = caps.get(0).expect("group 0 always present");
473        let raw_target = caps.get(1).map(|m| m.as_str()).unwrap_or("");
474        // Match on the SAME normalized key the read side uses, so `[[old]]`,
475        // `[[old.md]]`, and `[[./old]]` all retarget and a prefix like
476        // `[[old-jr]]` never does.
477        if normalize_target(Path::new(raw_target)) != old_target {
478            continue;
479        }
480        // Copy the gap since the previous rewrite verbatim, then the rebuilt
481        // link: canonical bare new target + the original display, if any.
482        out.push_str(&text[last..whole.start()]);
483        out.push_str("[[");
484        out.push_str(&new_target);
485        if let Some(display) = caps.get(2) {
486            out.push('|');
487            out.push_str(display.as_str());
488        }
489        out.push_str("]]");
490        last = whole.end();
491    }
492    out.push_str(&text[last..]);
493    out
494}
495
496// ── Private helpers ─────────────────────────────────────────────────────────
497
498/// The wiki-link regex used by the **write side** ([`rewrite_links_to`]):
499/// captures the target (group 1) AND the optional `|display` (group 2) so a
500/// rewrite can re-emit the display verbatim. The target/display character
501/// classes match [`wiki_link_re`] exactly, so the write side recognizes a link
502/// iff the read side does — the two never disagree on what a `[[…]]` is.
503///
504/// Compiled once for the process via [`OnceLock`] — `rewrite_links_to` runs on
505/// the rename/link write path, so recompiling per call would be wasteful.
506fn rewrite_link_re() -> &'static Regex {
507    static RE: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
508    RE.get_or_init(|| {
509        Regex::new(r"\[\[([^\]\|\n]+?)(?:\|([^\]\n]*))?\]\]")
510            .expect("static wiki-link rewrite regex compiles")
511    })
512}
513
514/// Normalize a store-relative path into the canonical wiki-link target form:
515/// forward slashes, no leading `./` or `/`, and no trailing `.md`. This is the
516/// single key that incoming/outgoing edges are compared on, so the `.md` and
517/// bare forms of a target unify.
518fn normalize_target(path: &Path) -> String {
519    let mut s = path.to_string_lossy().replace('\\', "/");
520    while let Some(rest) = s.strip_prefix("./") {
521        s = rest.to_string();
522    }
523    let s = s.trim_start_matches('/');
524    let s = s.strip_suffix(".md").unwrap_or(s);
525    s.trim().to_string()
526}
527
528/// The wiki-link regex: `[[target]]` / `[[target|display]]`. Captures the raw
529/// target (group 1). Compiled once for the process via [`OnceLock`] —
530/// `extract_link_targets` runs on the O(changed) loop path (backlinks /
531/// forwardlinks / neighborhood), so the compile must not repeat per call.
532fn wiki_link_re() -> &'static Regex {
533    // target = anything up to the first `|` or `]`. display (optional) is
534    // discarded. Matches across a single line/body slice.
535    static RE: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
536    RE.get_or_init(|| {
537        Regex::new(r"\[\[([^\]\|\n]+?)(?:\|[^\]\n]*)?\]\]")
538            .expect("static wiki-link regex compiles")
539    })
540}
541
542/// Extract every wiki-link target from a body, normalized to the canonical
543/// store-relative form. Order-preserving; duplicates kept (callers dedup).
544fn extract_link_targets(body: &str) -> Vec<String> {
545    let re = wiki_link_re();
546    re.captures_iter(body)
547        .filter_map(|c| c.get(1))
548        .map(|m| normalize_target(Path::new(m.as_str().trim())))
549        .filter(|t| !t.is_empty())
550        .collect()
551}
552
553/// Resolve the store root + a store-relative path to the absolute on-disk file,
554/// trying the path as written and then with a `.md` extension. `None` if
555/// neither exists.
556fn resolve_existing(store: &Store, store_relative: &Path) -> Option<PathBuf> {
557    let direct = store.root.join(store_relative);
558    if direct.is_file() {
559        return Some(direct);
560    }
561    let normalized = normalize_target(store_relative);
562    let with_md = store.root.join(format!("{normalized}.md"));
563    if with_md.is_file() {
564        return Some(with_md);
565    }
566    None
567}
568
569/// Convert an absolute path under the store root into its store-relative form.
570fn rel_path(store: &Store, abs: &Path) -> Option<PathBuf> {
571    abs.strip_prefix(&store.root).ok().map(|p| p.to_path_buf())
572}
573
574/// Which layer a store-relative path sits in, by its first component.
575fn path_layer(rel: &Path) -> Option<Layer> {
576    let first = rel.components().next()?;
577    match first.as_os_str().to_str()? {
578        "sources" => Some(Layer::Sources),
579        "records" => Some(Layer::Records),
580        "wiki" => Some(Layer::Wiki),
581        _ => None,
582    }
583}
584
585/// True if a store-relative path is a *content* file: under `sources/`,
586/// `records/`, or `wiki/`, a `.md` file, and not an `index.md`. Meta files
587/// (`DB.md`, `log.md`, `log/…`, sidecars) are excluded.
588fn is_content_rel(rel: &Path) -> bool {
589    if path_layer(rel).is_none() {
590        return false;
591    }
592    match rel.extension().and_then(|e| e.to_str()) {
593        Some("md") => {}
594        _ => return false,
595    }
596    rel.file_name().and_then(|n| n.to_str()) != Some("index.md")
597}
598
599/// Walk every content `.md` file in the store via the **`ignore`** walker
600/// (the ripgrep directory engine). Only the three layer roots
601/// (`sources/`/`records/`/`wiki/`) are descended, so `DB.md`, `log.md`, and
602/// `log/` at the store root are structurally never reached; hidden dirs and
603/// per-folder `index.md` sidecars are filtered out ([`is_content_rel`]). Honors
604/// `.gitignore` the way `rg` does. Returns absolute paths. SWEEP-class.
605fn walk_content_files(store: &Store) -> Result<Vec<PathBuf>, StoreError> {
606    let mut out = Vec::new();
607    for layer in Layer::all() {
608        let dir = store.root.join(layer_dir_name(layer));
609        if !dir.is_dir() {
610            continue;
611        }
612        let walker = WalkBuilder::new(&dir)
613            .hidden(true)
614            .git_ignore(true)
615            .git_global(false)
616            .require_git(false)
617            .build();
618        for result in walker {
619            let entry = result.map_err(|e| StoreError::Search {
620                root: store.root.clone(),
621                message: format!("walk failed: {e}"),
622            })?;
623            if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
624                continue;
625            }
626            let abs = entry.into_path();
627            if let Some(rel) = rel_path(store, &abs) {
628                if is_content_rel(&rel) {
629                    out.push(abs);
630                }
631            }
632        }
633    }
634    Ok(out)
635}
636
637/// The on-disk folder name for a layer. Mirrors `Layer::dir_name`; kept local
638/// so the graph module owns its own copy rather than coupling to that body.
639fn layer_dir_name(layer: Layer) -> &'static str {
640    match layer {
641        Layer::Sources => "sources",
642        Layer::Records => "records",
643        Layer::Wiki => "wiki",
644    }
645}
646
647/// Read a reached node's `summary` and `type` from its frontmatter. A missing
648/// file, missing frontmatter, or unparseable YAML degrades to an empty summary
649/// / unknown type rather than failing the whole hydration — `neighborhood` is
650/// best-effort context assembly, not validation.
651fn read_summary_and_type(store: &Store, rel: &Path) -> (String, Option<String>) {
652    let abs = match resolve_existing(store, rel) {
653        Some(a) => a,
654        None => return (String::new(), None),
655    };
656    let text = match std::fs::read_to_string(&abs) {
657        Ok(t) => t,
658        Err(_) => return (String::new(), None),
659    };
660    let yaml = match frontmatter_block(&text) {
661        Some(y) => y,
662        None => return (String::new(), None),
663    };
664    let value: serde_norway::Value = match serde_norway::from_str(yaml) {
665        Ok(v) => v,
666        Err(_) => return (String::new(), None),
667    };
668    let summary = value
669        .get("summary")
670        .and_then(|v| v.as_str())
671        .unwrap_or("")
672        .to_string();
673    let type_ = value
674        .get("type")
675        .and_then(|v| v.as_str())
676        .map(|s| s.to_string());
677    (summary, type_)
678}
679
680/// Return the YAML between the opening and closing `---` fences (exclusive), or
681/// `None` if the text has no leading frontmatter block. Local mirror of the
682/// parser's split so the graph module stays self-contained.
683fn frontmatter_block(text: &str) -> Option<&str> {
684    let rest = text
685        .strip_prefix("---\n")
686        .or_else(|| text.strip_prefix("---\r\n"))?;
687    // Find the closing fence: a line that is exactly `---`.
688    let mut idx = 0usize;
689    for line in rest.split_inclusive('\n') {
690        let trimmed = line.trim_end_matches(['\r', '\n']);
691        if trimmed == "---" {
692            return Some(&rest[..idx]);
693        }
694        idx += line.len();
695    }
696    None
697}
698
699#[cfg(test)]
700mod tests {
701    use super::*;
702    use std::fs;
703    use tempfile::TempDir;
704
705    use crate::parser::Config;
706
707    // ── Fixture builder ─────────────────────────────────────────────────────
708    //
709    // A real on-disk store in a tempdir. We write actual files (frontmatter +
710    // wiki-links) and exercise the real code paths. The fixture constructs the
711    // `Store` by its public fields rather than `Store::open`, so the graph
712    // tests stand on their own and do not depend on any other module's
713    // behavior. Each test asserts the behavior the SPEC promises, derived from
714    // intent, never from echoing the function's own output.
715    //
716    // `backlinks` (and `neighborhood` in any incoming direction) enumerate their
717    // candidate set from the type-folder `index.jsonl` sidecars — the loop
718    // contract: never a whole-store content walk. A real db.md store maintains
719    // those sidecars write-through, so a test that exercises backlinks must call
720    // [`Fixture::reindex`] after writing its files to build them (the SWEEP that
721    // `dbmd index rebuild` runs). Forwardlinks/orphans read content directly and
722    // need no sidecar.
723
724    struct Fixture {
725        _tmp: TempDir,
726        store: Store,
727    }
728
729    impl Fixture {
730        fn new() -> Self {
731            let tmp = TempDir::new().expect("tempdir");
732            let root = tmp.path().to_path_buf();
733            fs::write(root.join("DB.md"), "---\ntype: db-md\n---\n# store\n").expect("DB.md");
734            let store = Store {
735                root,
736                config: Config::default(),
737            };
738            Fixture { _tmp: tmp, store }
739        }
740
741        /// Write a content file at a store-relative path with the given type,
742        /// summary, and body. Creates parent dirs.
743        fn write(&self, rel: &str, type_: &str, summary: &str, body: &str) {
744            let abs = self.store.root.join(rel);
745            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
746            let contents = format!(
747                "---\ntype: {type_}\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: {summary}\n---\n{body}\n"
748            );
749            fs::write(&abs, contents).expect("write file");
750        }
751
752        /// Write a raw file verbatim (for frontmatter-shape edge cases).
753        fn write_raw(&self, rel: &str, contents: &str) {
754            let abs = self.store.root.join(rel);
755            fs::create_dir_all(abs.parent().unwrap()).expect("mkdir");
756            fs::write(&abs, contents).expect("write raw");
757        }
758
759        /// Build the type-folder `index.jsonl` sidecars from the content written
760        /// so far — the state a real store is always in (write-through), and the
761        /// candidate set `backlinks` reads. Call after writing files in any test
762        /// that exercises `backlinks` or an incoming-direction `neighborhood`.
763        fn reindex(&self) {
764            crate::index::Index::rebuild_all(&self.store).expect("rebuild sidecars");
765        }
766
767        fn p(&self, rel: &str) -> PathBuf {
768            PathBuf::from(rel)
769        }
770    }
771
772    fn paths(v: &[PathBuf]) -> Vec<String> {
773        v.iter()
774            .map(|p| p.to_string_lossy().replace('\\', "/"))
775            .collect()
776    }
777
778    // ── normalize_target ────────────────────────────────────────────────────
779
780    #[test]
781    fn normalize_strips_md_and_leading_dotslash() {
782        assert_eq!(
783            normalize_target(Path::new("records/contacts/sarah.md")),
784            "records/contacts/sarah"
785        );
786        assert_eq!(
787            normalize_target(Path::new("./wiki/people/elena")),
788            "wiki/people/elena"
789        );
790        assert_eq!(normalize_target(Path::new("/records/x")), "records/x");
791        // Bare and `.md` forms must collapse to the same key, or edges won't unify.
792        assert_eq!(
793            normalize_target(Path::new("a/b")),
794            normalize_target(Path::new("a/b.md"))
795        );
796    }
797
798    // ── extract_link_targets (forwardlinks core) ────────────────────────────
799
800    #[test]
801    fn extract_handles_display_text_and_md_suffix() {
802        let body = "See [[wiki/people/sarah-chen|Sarah]] and [[records/contacts/elena.md]].";
803        let got = extract_link_targets(body);
804        assert_eq!(
805            got,
806            vec!["wiki/people/sarah-chen", "records/contacts/elena"]
807        );
808    }
809
810    #[test]
811    fn extract_ignores_external_markdown_links() {
812        // Standard markdown links are NOT wiki-links and must not be extracted
813        // (SPEC: external refs don't participate in the graph).
814        let body = "[Acme](https://acme.io) but [[records/companies/acme]] is internal.";
815        let got = extract_link_targets(body);
816        assert_eq!(got, vec!["records/companies/acme"]);
817    }
818
819    #[test]
820    fn extract_display_text_is_not_treated_as_a_target() {
821        // A `|display` segment that looks path-like must not become a target;
822        // only the part before `|` is the link target.
823        let body = "[[records/contacts/sarah|sources/emails/decoy]]";
824        let got = extract_link_targets(body);
825        assert_eq!(got, vec!["records/contacts/sarah"]);
826    }
827
828    // ── rewrite_links_to (write-side twin of backlinks) ─────────────────────
829
830    #[test]
831    fn rewrite_plain_link_to_canonical_new_target() {
832        let got = rewrite_links_to(
833            "See [[records/contacts/sarah-chen]] today.",
834            Path::new("records/contacts/sarah-chen"),
835            Path::new("records/contacts/sarah-chen-acme"),
836        );
837        assert_eq!(got, "See [[records/contacts/sarah-chen-acme]] today.");
838    }
839
840    #[test]
841    fn rewrite_preserves_display_override() {
842        let got = rewrite_links_to(
843            "With [[records/contacts/sarah-chen|Sarah]].",
844            Path::new("records/contacts/sarah-chen"),
845            Path::new("records/contacts/sarah-chen-acme"),
846        );
847        assert_eq!(got, "With [[records/contacts/sarah-chen-acme|Sarah]].");
848    }
849
850    #[test]
851    fn rewrite_matches_md_suffixed_old_and_emits_bare_new() {
852        // The `.md` spelling of the old target must match (it normalizes to the
853        // same key the read side uses), and the new target is emitted bare —
854        // the writer doctrine validate enforces (`WIKI_LINK_HAS_EXTENSION`).
855        let got = rewrite_links_to(
856            "[[records/contacts/sarah-chen.md]]",
857            Path::new("records/contacts/sarah-chen"),
858            Path::new("records/contacts/new.md"),
859        );
860        assert_eq!(got, "[[records/contacts/new]]");
861    }
862
863    #[test]
864    fn rewrite_leaves_prefix_collisions_and_short_form_untouched() {
865        // Boundary correctness, anchored to the SAME normalize_target the read
866        // side keys on: `records/contacts/sarah-chen` must NOT match the longer
867        // `[[…-jr]]`, the short-form `[[sarah-chen]]`, or an unrelated target.
868        let input = "[[records/contacts/sarah-chen-jr]] [[sarah-chen]] [[wiki/topics/x]]";
869        let got = rewrite_links_to(
870            input,
871            Path::new("records/contacts/sarah-chen"),
872            Path::new("records/contacts/new"),
873        );
874        assert_eq!(got, input, "no genuine edge to the seed → text unchanged");
875    }
876
877    #[test]
878    fn rewrite_handles_multiple_occurrences_and_mixed_spellings() {
879        let got = rewrite_links_to(
880            "[[records/x]] then [[./records/x]] and [[records/x.md|d]] end",
881            Path::new("records/x"),
882            Path::new("records/y"),
883        );
884        // All three spellings of the same target retarget; the display survives.
885        assert_eq!(
886            got,
887            "[[records/y]] then [[records/y]] and [[records/y|d]] end"
888        );
889    }
890
891    #[test]
892    fn rewrite_retargets_exactly_the_edges_the_core_parser_sees() {
893        // The load-bearing property of moving the rewrite into core: the write
894        // side must operate on EXACTLY the edge set the read side recognizes —
895        // the same `extract_link_targets` / `normalize_target` grammar that
896        // `forwardlinks` is built on. Anchor the test to that grammar (via
897        // `forwardlinks` on a real file) rather than re-listing literals, so a
898        // future divergence between the read parser and the write rewrite fails
899        // here. (Coupled to `forwardlinks` — the single-file edge extractor —
900        // not the multi-file `backlinks` traversal, so it tests the grammar, not
901        // the walk.)
902        let fx = Fixture::new();
903        let body = "Met [[records/contacts/sarah.md|Sarah]] and not [[records/contacts/sarah-2]].";
904        fx.write("wiki/people/bio.md", "wiki-page", "bio", body);
905
906        // Read side: the parser sees two outgoing edges, both in canonical bare
907        // form (the `.md` spelling collapsed). `sarah` is a real edge here.
908        let edges = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
909        assert_eq!(
910            paths(&edges),
911            vec!["records/contacts/sarah", "records/contacts/sarah-2"],
912            "fixture must contain exactly the two edges this test reasons about"
913        );
914
915        // Write side: rewriting `sarah → sarah-chen` must retarget the edge the
916        // parser recognized (matching the `.md` spelling), preserve the display,
917        // and leave the unrelated `sarah-2` edge untouched.
918        let got = rewrite_links_to(
919            body,
920            Path::new("records/contacts/sarah"),
921            Path::new("records/contacts/sarah-chen"),
922        );
923        assert_eq!(
924            got,
925            "Met [[records/contacts/sarah-chen|Sarah]] and not [[records/contacts/sarah-2]]."
926        );
927
928        // Cross-check through the parser: the rewritten text's edge set is the
929        // original with `sarah` swapped for `sarah-chen` — proving the rewrite
930        // moved exactly one edge, the one the read side keyed on.
931        fx.write("wiki/people/bio.md", "wiki-page", "bio", &got);
932        let after = forwardlinks(&fx.store, &fx.p("wiki/people/bio.md")).unwrap();
933        assert_eq!(
934            paths(&after),
935            vec!["records/contacts/sarah-2", "records/contacts/sarah-chen"],
936            "after rewrite the parser must see the new target and not the old"
937        );
938    }
939
940    #[test]
941    fn rewrite_empty_old_target_is_a_no_op() {
942        // A degenerate `old` (normalizes to empty) must never rewrite anything,
943        // mirroring backlinks' empty-target guard.
944        let input = "[[records/x]] [[]] text";
945        let got = rewrite_links_to(input, Path::new(""), Path::new("records/y"));
946        assert_eq!(got, input);
947    }
948
949    #[test]
950    fn rewrite_no_match_returns_input_unchanged() {
951        let input = "no links, [external](https://x), and [[wiki/topics/y]]";
952        let got = rewrite_links_to(input, Path::new("records/x"), Path::new("records/z"));
953        assert_eq!(got, input);
954    }
955
956    // ── forwardlinks ─────────────────────────────────────────────────────────
957
958    #[test]
959    fn forwardlinks_returns_sorted_deduped_targets_excluding_self() {
960        let fx = Fixture::new();
961        fx.write(
962            "wiki/projects/renewal.md",
963            "wiki-page",
964            "Renewal project",
965            "Links: [[records/contacts/sarah]] [[records/companies/acme]] [[records/contacts/sarah]] and itself [[wiki/projects/renewal]].",
966        );
967        // The targets need not exist on disk for forwardlinks (it reads the one
968        // file only). Self-links are dropped; duplicates collapse; sorted asc.
969        let got = forwardlinks(&fx.store, &fx.p("wiki/projects/renewal.md")).unwrap();
970        assert_eq!(
971            paths(&got),
972            vec!["records/companies/acme", "records/contacts/sarah"]
973        );
974    }
975
976    #[test]
977    fn forwardlinks_picks_up_wiki_links_in_frontmatter() {
978        // SPEC: wiki-links appear in scalar + block-sequence frontmatter fields,
979        // not just the body. forwardlinks must follow those edges too.
980        let fx = Fixture::new();
981        fx.write_raw(
982            "records/meetings/m1.md",
983            "---\ntype: meeting\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Renewal sync\ncompany: [[records/companies/acme]]\nattendees:\n  - [[records/contacts/sarah]]\n  - [[records/contacts/elena]]\n---\nNotes about [[wiki/projects/renewal]].\n",
984        );
985        let got = forwardlinks(&fx.store, &fx.p("records/meetings/m1.md")).unwrap();
986        assert_eq!(
987            paths(&got),
988            vec![
989                "records/companies/acme",
990                "records/contacts/elena",
991                "records/contacts/sarah",
992                "wiki/projects/renewal",
993            ]
994        );
995    }
996
997    #[test]
998    fn forwardlinks_missing_file_is_empty_not_error() {
999        let fx = Fixture::new();
1000        let got = forwardlinks(&fx.store, &fx.p("wiki/people/ghost.md")).unwrap();
1001        assert!(got.is_empty());
1002    }
1003
1004    #[test]
1005    fn forwardlinks_resolves_seed_given_without_md_extension() {
1006        let fx = Fixture::new();
1007        fx.write(
1008            "wiki/people/sarah.md",
1009            "wiki-page",
1010            "Sarah bio",
1011            "Works at [[records/companies/acme]].",
1012        );
1013        // Seed passed in bare wiki-link form (no `.md`) must still resolve.
1014        let got = forwardlinks(&fx.store, &fx.p("wiki/people/sarah")).unwrap();
1015        assert_eq!(paths(&got), vec!["records/companies/acme"]);
1016    }
1017
1018    // ── backlinks ──────────────────────────────────────────────────────────
1019
1020    #[test]
1021    fn backlinks_finds_incoming_across_layers_and_link_forms() {
1022        let fx = Fixture::new();
1023        // Target.
1024        fx.write("records/contacts/sarah.md", "contact", "Sarah Chen", "");
1025        // Three different incoming-link spellings, all to the same target.
1026        fx.write(
1027            "wiki/people/sarah.md",
1028            "wiki-page",
1029            "bio",
1030            "See [[records/contacts/sarah]].",
1031        );
1032        fx.write(
1033            "records/meetings/m1.md",
1034            "meeting",
1035            "Renewal call",
1036            "Attendee [[records/contacts/sarah|Sarah]].",
1037        );
1038        fx.write(
1039            "sources/emails/e1.md",
1040            "email",
1041            "Hi",
1042            "From [[records/contacts/sarah.md]] today.",
1043        );
1044        // A file that links to a DIFFERENT contact must not be a backlink.
1045        fx.write(
1046            "wiki/people/other.md",
1047            "wiki-page",
1048            "x",
1049            "[[records/contacts/sarah-2]]",
1050        );
1051        fx.reindex();
1052
1053        // All three link forms ([[x]], [[x|d]], [[x.md]]) resolve to the same
1054        // target and are found; the linkers are returned in canonical bare form.
1055        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1056        assert_eq!(
1057            paths(&got),
1058            vec![
1059                "records/meetings/m1",
1060                "sources/emails/e1",
1061                "wiki/people/sarah",
1062            ]
1063        );
1064    }
1065
1066    #[test]
1067    fn backlinks_and_forwardlinks_round_trip_on_same_key() {
1068        // If A forwardlinks to B, then B backlinks to A — both expressed in the
1069        // identical bare key, so neighborhood can dedup across directions.
1070        let fx = Fixture::new();
1071        fx.write(
1072            "wiki/people/a.md",
1073            "wiki-page",
1074            "A",
1075            "Knows [[wiki/people/b]].",
1076        );
1077        fx.write("wiki/people/b.md", "wiki-page", "B", "");
1078        fx.reindex();
1079        let fwd = forwardlinks(&fx.store, &fx.p("wiki/people/a.md")).unwrap();
1080        let back = backlinks(&fx.store, &fx.p("wiki/people/b.md")).unwrap();
1081        assert_eq!(paths(&fwd), vec!["wiki/people/b"]);
1082        assert_eq!(paths(&back), vec!["wiki/people/a"]);
1083    }
1084
1085    #[test]
1086    fn backlinks_does_not_match_path_prefix_collisions() {
1087        let fx = Fixture::new();
1088        fx.write("records/contacts/sam.md", "contact", "Sam", "");
1089        // `sam-smith` shares the `sam` prefix; must NOT count as a backlink to `sam`.
1090        fx.write(
1091            "wiki/people/x.md",
1092            "wiki-page",
1093            "x",
1094            "[[records/contacts/sam-smith]]",
1095        );
1096        // The genuine backlink.
1097        fx.write(
1098            "wiki/people/y.md",
1099            "wiki-page",
1100            "y",
1101            "[[records/contacts/sam]]",
1102        );
1103        fx.reindex();
1104
1105        let got = backlinks(&fx.store, &fx.p("records/contacts/sam")).unwrap();
1106        assert_eq!(paths(&got), vec!["wiki/people/y"]);
1107    }
1108
1109    #[test]
1110    fn backlinks_excludes_self_reference() {
1111        let fx = Fixture::new();
1112        // A page that links to itself is not its own backlink.
1113        fx.write(
1114            "wiki/synthesis/overview.md",
1115            "wiki-page",
1116            "Overview",
1117            "This page [[wiki/synthesis/overview]] references itself.",
1118        );
1119        fx.reindex();
1120        let got = backlinks(&fx.store, &fx.p("wiki/synthesis/overview.md")).unwrap();
1121        assert!(
1122            got.is_empty(),
1123            "self-link must not appear as a backlink, got {got:?}"
1124        );
1125    }
1126
1127    #[test]
1128    fn backlinks_empty_when_nobody_links() {
1129        let fx = Fixture::new();
1130        fx.write("records/contacts/lonely.md", "contact", "Lonely", "");
1131        fx.write(
1132            "wiki/people/unrelated.md",
1133            "wiki-page",
1134            "x",
1135            "[[records/companies/acme]]",
1136        );
1137        fx.reindex();
1138        let got = backlinks(&fx.store, &fx.p("records/contacts/lonely.md")).unwrap();
1139        assert!(got.is_empty());
1140    }
1141
1142    #[test]
1143    fn backlinks_ignores_index_and_meta_files() {
1144        let fx = Fixture::new();
1145        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1146        // An index.md that lists the target must NOT be reported as a backlink
1147        // (indexes are catalog, not relationship edges).
1148        fx.write_raw(
1149            "records/contacts/index.md",
1150            "---\ntype: index\nscope: folder\nfolder: records/contacts\n---\n- [[records/contacts/sarah]] — Sarah\n",
1151        );
1152        fx.reindex();
1153        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1154        assert!(got.is_empty(), "index.md must be excluded, got {got:?}");
1155    }
1156
1157    #[test]
1158    fn backlinks_finds_body_only_edge_not_in_frontmatter_links_field() {
1159        // REGRESSION: the sidecar's `links` field carries only the file's
1160        // frontmatter `links:` list; it does NOT include wiki-links written in
1161        // the body or in other typed frontmatter fields. Answering backlinks
1162        // from `links[]` alone would silently miss this edge. The candidate set
1163        // is sidecar-bounded, but each candidate's edge is confirmed by parsing
1164        // the file (the same extraction forwardlinks uses), so a body-only link
1165        // must still register as a backlink.
1166        let fx = Fixture::new();
1167        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1168        // `meeting.md` links to sarah ONLY in its body — its frontmatter has no
1169        // `links:` field at all, so the sidecar record's `links` is empty.
1170        fx.write(
1171            "records/meetings/standup.md",
1172            "meeting",
1173            "Standup",
1174            "Discussed renewal with [[records/contacts/sarah]].",
1175        );
1176        fx.reindex();
1177
1178        // Guard the premise: the sidecar record really does carry an empty
1179        // `links` (so this test fails loudly if the index ever starts extracting
1180        // body links — at which point the backlink predicate could be revisited).
1181        let rec = fx
1182            .store
1183            .find_by_type("meeting")
1184            .unwrap()
1185            .into_iter()
1186            .find(|r| r.path == fx.p("records/meetings/standup.md"))
1187            .expect("meeting is catalogued in its sidecar");
1188        assert!(
1189            rec.links.is_empty(),
1190            "premise: the body link is NOT projected into the sidecar `links` field; got {:?}",
1191            rec.links
1192        );
1193
1194        // Yet backlinks still finds it — because it confirms via the file parse,
1195        // not via the sidecar `links` field.
1196        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1197        assert_eq!(
1198            paths(&got),
1199            vec!["records/meetings/standup"],
1200            "a body-only wiki-link must register as a backlink"
1201        );
1202    }
1203
1204    #[test]
1205    fn backlinks_finds_edge_in_typed_frontmatter_field() {
1206        // A wiki-link inside a *typed* frontmatter field (`company:`) is a real
1207        // edge forwardlinks follows, so backlinks must find it too — even though
1208        // the sidecar's `links` field (the `links:` key only) does not list it.
1209        let fx = Fixture::new();
1210        fx.write("records/companies/acme.md", "company", "Acme", "");
1211        fx.write_raw(
1212            "records/contacts/sarah.md",
1213            "---\ntype: contact\ncreated: 2026-05-01T00:00:00Z\nupdated: 2026-05-01T00:00:00Z\nsummary: Sarah\ncompany: [[records/companies/acme]]\n---\nBody with no links.\n",
1214        );
1215        fx.reindex();
1216        let got = backlinks(&fx.store, &fx.p("records/companies/acme.md")).unwrap();
1217        assert_eq!(
1218            paths(&got),
1219            vec!["records/contacts/sarah"],
1220            "a wiki-link in a typed frontmatter field is an incoming edge"
1221        );
1222    }
1223
1224    #[test]
1225    fn backlinks_unscoped_scans_the_tree_not_only_the_sidecar() {
1226        // REGRESSION (loop budget): an UNSCOPED `backlinks` must resolve incoming
1227        // edges with a SINGLE embedded-ripgrep pass over the tree
1228        // (`Store::find_links_to`), NOT by reading the sidecar candidate set and
1229        // then `read_to_string`-confirming each candidate (which re-opens every
1230        // content file → O(store); the documented >3x budget miss). A ripgrep
1231        // pass is the same scan engine `validate`/`rename`/`dbmd links` ride, and
1232        // the tree — not the sidecar — is its ground truth: a linker that is on
1233        // disk but absent from every sidecar (stale / never-built index) is still
1234        // found. We assert that behaviorally, which fails loudly if the unscoped
1235        // path ever reverts to the sidecar-bounded per-candidate confirm loop
1236        // (that loop would NOT find the unindexed linker).
1237        let fx = Fixture::new();
1238        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1239        fx.write(
1240            "wiki/people/indexed.md",
1241            "wiki-page",
1242            "Indexed",
1243            "[[records/contacts/sarah]]",
1244        );
1245        fx.reindex(); // builds sidecars for sarah + the indexed linker
1246
1247        // Now drop a NEW linker on disk WITHOUT reindexing — it is on disk but in
1248        // no sidecar.
1249        fx.write(
1250            "wiki/people/unindexed.md",
1251            "wiki-page",
1252            "Unindexed",
1253            "[[records/contacts/sarah]]",
1254        );
1255
1256        let got = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1257        assert_eq!(
1258            paths(&got),
1259            vec!["wiki/people/indexed", "wiki/people/unindexed"],
1260            "unscoped backlinks ripgrep-scans the tree, so the on-disk-but-unindexed \
1261             linker is found too — not only the sidecar-catalogued one"
1262        );
1263    }
1264
1265    #[test]
1266    fn backlinks_scoped_candidates_come_from_the_sidecar_not_a_tree_walk() {
1267        // REGRESSION (scale contract): the SCOPED form (`--type` / `--in`) is the
1268        // I/O-scoped path — it enumerates candidates from the relevant type-folder
1269        // `index.jsonl` sidecars and parses only those, NOT a whole-tree walk.
1270        // That is what makes the scope an I/O scope, not just a result filter:
1271        // a linker that is on disk but ABSENT from the sidecar (stale / never-built
1272        // index) is NOT discovered by the scoped call (the sidecar bounds which
1273        // files are candidates). This is the loop-vs-walk distinction the SPEC
1274        // draws, and it is exactly the inverse of the unscoped tree scan above.
1275        let fx = Fixture::new();
1276        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1277        fx.write(
1278            "wiki/people/indexed.md",
1279            "wiki-page",
1280            "Indexed",
1281            "[[records/contacts/sarah]]",
1282        );
1283        fx.reindex(); // builds sidecars for sarah + the indexed linker
1284
1285        // Drop a NEW wiki-page linker on disk WITHOUT reindexing — on disk, in no
1286        // sidecar.
1287        fx.write(
1288            "wiki/people/unindexed.md",
1289            "wiki-page",
1290            "Unindexed",
1291            "[[records/contacts/sarah]]",
1292        );
1293
1294        // Scoped to the `wiki-page` type: the candidate set is the sidecar's, so
1295        // only the catalogued linker is found — the unindexed one is invisible.
1296        let only_wiki_pages = vec!["wiki-page".to_string()];
1297        let got = backlinks_filtered(
1298            &fx.store,
1299            &fx.p("records/contacts/sarah.md"),
1300            &only_wiki_pages,
1301            None,
1302        )
1303        .unwrap();
1304        assert_eq!(
1305            paths(&got),
1306            vec!["wiki/people/indexed"],
1307            "scoped backlinks reads the sidecar candidate set; the on-disk-but-unindexed \
1308             linker is not tree-walked"
1309        );
1310    }
1311
1312    #[test]
1313    fn backlinks_filtered_type_scopes_the_candidate_set() {
1314        // `--type` narrows backlinks to linkers of that type. Two files link to
1315        // the target — one `meeting`, one `wiki-page`; filtering to `meeting`
1316        // returns only the meeting.
1317        let fx = Fixture::new();
1318        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1319        fx.write(
1320            "records/meetings/m1.md",
1321            "meeting",
1322            "Call",
1323            "[[records/contacts/sarah]]",
1324        );
1325        fx.write(
1326            "wiki/people/bio.md",
1327            "wiki-page",
1328            "Bio",
1329            "[[records/contacts/sarah]]",
1330        );
1331        fx.reindex();
1332
1333        let only_meetings = vec!["meeting".to_string()];
1334        let got = backlinks_filtered(
1335            &fx.store,
1336            &fx.p("records/contacts/sarah.md"),
1337            &only_meetings,
1338            None,
1339        )
1340        .unwrap();
1341        assert_eq!(
1342            paths(&got),
1343            vec!["records/meetings/m1"],
1344            "--type meeting must exclude the wiki-page linker"
1345        );
1346
1347        // Unfiltered, both come back — proving the filter (not the data) dropped one.
1348        let all = backlinks(&fx.store, &fx.p("records/contacts/sarah.md")).unwrap();
1349        assert_eq!(paths(&all), vec!["records/meetings/m1", "wiki/people/bio"]);
1350    }
1351
1352    #[test]
1353    fn backlinks_filtered_layer_scopes_the_candidate_set() {
1354        // `--in <layer>` narrows backlinks to linkers under that layer.
1355        let fx = Fixture::new();
1356        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1357        fx.write(
1358            "records/meetings/m1.md",
1359            "meeting",
1360            "Call",
1361            "[[records/contacts/sarah]]",
1362        );
1363        fx.write(
1364            "wiki/people/bio.md",
1365            "wiki-page",
1366            "Bio",
1367            "[[records/contacts/sarah]]",
1368        );
1369        fx.reindex();
1370
1371        let got = backlinks_filtered(
1372            &fx.store,
1373            &fx.p("records/contacts/sarah.md"),
1374            &[],
1375            Some(Layer::Wiki),
1376        )
1377        .unwrap();
1378        assert_eq!(
1379            paths(&got),
1380            vec!["wiki/people/bio"],
1381            "--in wiki must keep only the wiki-layer linker"
1382        );
1383
1384        let records_only = backlinks_filtered(
1385            &fx.store,
1386            &fx.p("records/contacts/sarah.md"),
1387            &[],
1388            Some(Layer::Records),
1389        )
1390        .unwrap();
1391        assert_eq!(paths(&records_only), vec!["records/meetings/m1"]);
1392    }
1393
1394    // ── neighborhood ─────────────────────────────────────────────────────────
1395
1396    #[test]
1397    fn neighborhood_hops_zero_is_empty() {
1398        let fx = Fixture::new();
1399        fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
1400        fx.write("wiki/people/b.md", "wiki-page", "B", "");
1401        let slice = neighborhood(
1402            &fx.store,
1403            &fx.p("wiki/people/a.md"),
1404            0,
1405            &[],
1406            Direction::Both,
1407        )
1408        .unwrap();
1409        assert_eq!(slice.seed, fx.p("wiki/people/a"));
1410        assert!(slice.nodes.is_empty());
1411    }
1412
1413    #[test]
1414    fn neighborhood_outgoing_one_hop_reads_summary_and_type() {
1415        let fx = Fixture::new();
1416        fx.write(
1417            "wiki/people/a.md",
1418            "wiki-page",
1419            "Person A",
1420            "Knows [[records/contacts/b]].",
1421        );
1422        fx.write("records/contacts/b.md", "contact", "Contact B summary", "");
1423        let slice = neighborhood(
1424            &fx.store,
1425            &fx.p("wiki/people/a.md"),
1426            1,
1427            &[],
1428            Direction::Outgoing,
1429        )
1430        .unwrap();
1431        assert_eq!(slice.nodes.len(), 1);
1432        let n = &slice.nodes[0];
1433        assert_eq!(n.path, fx.p("records/contacts/b"));
1434        assert_eq!(n.summary, "Contact B summary");
1435        assert_eq!(n.type_.as_deref(), Some("contact"));
1436        assert_eq!(n.hops, 1);
1437        assert_eq!(n.via, Some((fx.p("wiki/people/a"), Direction::Outgoing)));
1438    }
1439
1440    #[test]
1441    fn neighborhood_incoming_only_walks_backlinks() {
1442        let fx = Fixture::new();
1443        // a -> seed (incoming to seed). seed -> c (outgoing from seed).
1444        fx.write(
1445            "wiki/people/seed.md",
1446            "wiki-page",
1447            "Seed",
1448            "Out to [[wiki/people/c]].",
1449        );
1450        fx.write(
1451            "wiki/people/a.md",
1452            "wiki-page",
1453            "A",
1454            "In to [[wiki/people/seed]].",
1455        );
1456        fx.write("wiki/people/c.md", "wiki-page", "C", "");
1457        fx.reindex();
1458        let slice = neighborhood(
1459            &fx.store,
1460            &fx.p("wiki/people/seed.md"),
1461            1,
1462            &[],
1463            Direction::Incoming,
1464        )
1465        .unwrap();
1466        // Incoming direction: only `a` (which links TO seed), not `c`.
1467        assert_eq!(
1468            paths(
1469                &slice
1470                    .nodes
1471                    .iter()
1472                    .map(|n| n.path.clone())
1473                    .collect::<Vec<_>>()
1474            ),
1475            vec!["wiki/people/a"]
1476        );
1477        assert_eq!(
1478            slice.nodes[0].via,
1479            Some((fx.p("wiki/people/seed"), Direction::Incoming))
1480        );
1481    }
1482
1483    #[test]
1484    fn neighborhood_bounded_bfs_respects_hop_limit_and_min_distance() {
1485        let fx = Fixture::new();
1486        // Chain a -> b -> c -> d, all outgoing.
1487        fx.write("wiki/c/a.md", "wiki-page", "A", "[[wiki/c/b]]");
1488        fx.write("wiki/c/b.md", "wiki-page", "B", "[[wiki/c/c]]");
1489        fx.write("wiki/c/c.md", "wiki-page", "C", "[[wiki/c/d]]");
1490        fx.write("wiki/c/d.md", "wiki-page", "D", "");
1491        let slice =
1492            neighborhood(&fx.store, &fx.p("wiki/c/a.md"), 2, &[], Direction::Outgoing).unwrap();
1493        // 2 hops reaches b (1) and c (2), not d (3).
1494        let by_path: HashMap<String, u32> = slice
1495            .nodes
1496            .iter()
1497            .map(|n| (n.path.to_string_lossy().to_string(), n.hops))
1498            .collect();
1499        assert_eq!(by_path.get("wiki/c/b").copied(), Some(1));
1500        assert_eq!(by_path.get("wiki/c/c").copied(), Some(2));
1501        assert_eq!(by_path.get("wiki/c/d"), None);
1502        assert_eq!(slice.nodes.len(), 2);
1503    }
1504
1505    #[test]
1506    fn neighborhood_records_min_hops_on_diamond() {
1507        let fx = Fixture::new();
1508        // Diamond: a -> b, a -> c, b -> d, c -> d. d is reachable at hop 2 from
1509        // either branch; it must be recorded once, at hop 2.
1510        fx.write("wiki/d/a.md", "wiki-page", "A", "[[wiki/d/b]] [[wiki/d/c]]");
1511        fx.write("wiki/d/b.md", "wiki-page", "B", "[[wiki/d/d]]");
1512        fx.write("wiki/d/c.md", "wiki-page", "C", "[[wiki/d/d]]");
1513        fx.write("wiki/d/d.md", "wiki-page", "D", "");
1514        let slice =
1515            neighborhood(&fx.store, &fx.p("wiki/d/a.md"), 3, &[], Direction::Outgoing).unwrap();
1516        let d_nodes: Vec<&ContextNode> = slice
1517            .nodes
1518            .iter()
1519            .filter(|n| n.path == fx.p("wiki/d/d"))
1520            .collect();
1521        assert_eq!(d_nodes.len(), 1, "d must appear exactly once");
1522        assert_eq!(d_nodes[0].hops, 2, "d's min distance from a is 2");
1523        // b and c at hop 1, d at hop 2 => 3 nodes total, no cycle blowup.
1524        assert_eq!(slice.nodes.len(), 3);
1525    }
1526
1527    #[test]
1528    fn neighborhood_type_filter_narrows_results_but_not_traversal() {
1529        let fx = Fixture::new();
1530        // seed -> contact -> meeting. Filtering to `meeting` must still reach
1531        // the meeting THROUGH the (excluded) contact at hop 2.
1532        fx.write(
1533            "wiki/people/seed.md",
1534            "wiki-page",
1535            "Seed",
1536            "[[records/contacts/sarah]]",
1537        );
1538        fx.write(
1539            "records/contacts/sarah.md",
1540            "contact",
1541            "Sarah",
1542            "[[records/meetings/m1]]",
1543        );
1544        fx.write("records/meetings/m1.md", "meeting", "Renewal call", "");
1545        let only_meetings = vec!["meeting".to_string()];
1546        let slice = neighborhood(
1547            &fx.store,
1548            &fx.p("wiki/people/seed.md"),
1549            2,
1550            &only_meetings,
1551            Direction::Outgoing,
1552        )
1553        .unwrap();
1554        // Only the meeting is returned; the contact is traversed but filtered out.
1555        assert_eq!(slice.nodes.len(), 1);
1556        assert_eq!(slice.nodes[0].path, fx.p("records/meetings/m1"));
1557        assert_eq!(slice.nodes[0].type_.as_deref(), Some("meeting"));
1558        assert_eq!(slice.nodes[0].hops, 2);
1559    }
1560
1561    #[test]
1562    fn neighborhood_cycle_terminates() {
1563        let fx = Fixture::new();
1564        // a <-> b cycle. Must not loop forever; each appears once.
1565        fx.write("wiki/g/a.md", "wiki-page", "A", "[[wiki/g/b]]");
1566        fx.write("wiki/g/b.md", "wiki-page", "B", "[[wiki/g/a]]");
1567        fx.reindex();
1568        let slice =
1569            neighborhood(&fx.store, &fx.p("wiki/g/a.md"), 10, &[], Direction::Both).unwrap();
1570        // From a: b is the only other node (a is the seed, excluded).
1571        assert_eq!(
1572            paths(
1573                &slice
1574                    .nodes
1575                    .iter()
1576                    .map(|n| n.path.clone())
1577                    .collect::<Vec<_>>()
1578            ),
1579            vec!["wiki/g/b"]
1580        );
1581    }
1582
1583    // ── orphans ──────────────────────────────────────────────────────────────
1584
1585    #[test]
1586    fn orphans_finds_files_with_no_edges_either_direction() {
1587        let fx = Fixture::new();
1588        // Wired pair: a links to b (a has outgoing, b has incoming).
1589        fx.write("wiki/people/a.md", "wiki-page", "A", "[[wiki/people/b]]");
1590        fx.write("wiki/people/b.md", "wiki-page", "B", "");
1591        // Orphan: no links in or out.
1592        fx.write(
1593            "sources/emails/lonely.md",
1594            "email",
1595            "Lonely email",
1596            "Just text, no links.",
1597        );
1598        let got = orphans(&fx.store, None).unwrap();
1599        assert_eq!(paths(&got), vec!["sources/emails/lonely.md"]);
1600    }
1601
1602    #[test]
1603    fn orphans_file_with_only_broken_outgoing_link_is_orphan() {
1604        let fx = Fixture::new();
1605        // Broken targets are validation issues, not graph edges to another
1606        // store file. A file whose only link points nowhere is still an orphan.
1607        fx.write(
1608            "wiki/people/a.md",
1609            "wiki-page",
1610            "A",
1611            "[[records/contacts/ghost]]",
1612        );
1613        let got = orphans(&fx.store, None).unwrap();
1614        assert!(
1615            paths(&got).contains(&"wiki/people/a.md".to_string()),
1616            "broken outgoing links must not wire the graph: {got:?}"
1617        );
1618    }
1619
1620    #[test]
1621    fn orphans_file_with_only_incoming_is_not_orphan() {
1622        let fx = Fixture::new();
1623        // `target` has no outgoing links but IS linked to by `linker` — not an orphan.
1624        fx.write("records/contacts/target.md", "contact", "Target", "");
1625        fx.write(
1626            "wiki/people/linker.md",
1627            "wiki-page",
1628            "Linker",
1629            "[[records/contacts/target]]",
1630        );
1631        let got = orphans(&fx.store, None).unwrap();
1632        assert!(
1633            !paths(&got).contains(&"records/contacts/target.md".to_string()),
1634            "incoming-only is not an orphan: {got:?}"
1635        );
1636        // `linker` has outgoing, so also not an orphan.
1637        assert!(!paths(&got).contains(&"wiki/people/linker.md".to_string()));
1638    }
1639
1640    #[test]
1641    fn orphans_incoming_link_from_other_layer_unorphans() {
1642        let fx = Fixture::new();
1643        // Candidate in records/, only incoming edge comes from wiki/ — a
1644        // cross-layer link must still un-orphan it even when scoped to records.
1645        fx.write("records/contacts/sarah.md", "contact", "Sarah", "");
1646        fx.write(
1647            "wiki/people/sarah.md",
1648            "wiki-page",
1649            "bio",
1650            "[[records/contacts/sarah]]",
1651        );
1652        // A genuine orphan in records/ to prove the scope still returns something.
1653        fx.write("records/contacts/nemo.md", "contact", "Nemo", "");
1654        let got = orphans(&fx.store, Some(Layer::Records)).unwrap();
1655        assert_eq!(paths(&got), vec!["records/contacts/nemo.md"]);
1656    }
1657
1658    #[test]
1659    fn orphans_layer_scope_filters_candidates() {
1660        let fx = Fixture::new();
1661        // One orphan per layer.
1662        fx.write("sources/emails/s.md", "email", "S", "no links");
1663        fx.write("records/contacts/r.md", "contact", "R", "");
1664        fx.write("wiki/people/w.md", "wiki-page", "W", "");
1665        let only_wiki = orphans(&fx.store, Some(Layer::Wiki)).unwrap();
1666        assert_eq!(paths(&only_wiki), vec!["wiki/people/w.md"]);
1667        let only_sources = orphans(&fx.store, Some(Layer::Sources)).unwrap();
1668        assert_eq!(paths(&only_sources), vec!["sources/emails/s.md"]);
1669        // No scope: all three, sorted (records, sources, wiki).
1670        let all = orphans(&fx.store, None).unwrap();
1671        assert_eq!(
1672            paths(&all),
1673            vec![
1674                "records/contacts/r.md",
1675                "sources/emails/s.md",
1676                "wiki/people/w.md",
1677            ]
1678        );
1679    }
1680
1681    #[test]
1682    fn orphans_self_link_does_not_count_as_an_edge() {
1683        let fx = Fixture::new();
1684        // A page that only links to itself has no real edges => still an orphan.
1685        fx.write(
1686            "wiki/synthesis/solo.md",
1687            "wiki-page",
1688            "Solo",
1689            "I reference [[wiki/synthesis/solo]] only.",
1690        );
1691        let got = orphans(&fx.store, None).unwrap();
1692        assert_eq!(paths(&got), vec!["wiki/synthesis/solo.md"]);
1693    }
1694
1695    #[test]
1696    fn orphans_excludes_index_and_db_files() {
1697        let fx = Fixture::new();
1698        // A lone index.md / DB.md must never be reported as an orphan content file.
1699        fx.write_raw(
1700            "wiki/index.md",
1701            "---\ntype: index\nscope: layer\nfolder: wiki\n---\n# wiki\n",
1702        );
1703        fx.write(
1704            "wiki/people/real-orphan.md",
1705            "wiki-page",
1706            "Real",
1707            "no links",
1708        );
1709        let got = orphans(&fx.store, None).unwrap();
1710        assert_eq!(paths(&got), vec!["wiki/people/real-orphan.md"]);
1711    }
1712
1713    // ── frontmatter_block helper ─────────────────────────────────────────────
1714
1715    #[test]
1716    fn frontmatter_block_extracts_between_fences() {
1717        let text = "---\ntype: contact\nsummary: hi\n---\nbody here\n";
1718        assert_eq!(
1719            frontmatter_block(text),
1720            Some("type: contact\nsummary: hi\n")
1721        );
1722    }
1723
1724    #[test]
1725    fn frontmatter_block_none_without_leading_fence() {
1726        let text = "no frontmatter here\n";
1727        assert_eq!(frontmatter_block(text), None);
1728    }
1729}