Skip to main content

dbmd_core/
store.rs

1//! `store` — walk, locate, and shard a db.md store.
2//!
3//! A db.md store is one directory marked by an uppercase `DB.md` at its root.
4//! [`Store::open`] is the single gate every store-walking subcommand goes
5//! through; a missing `DB.md` is the [`NotAStore`] error (`NOT_A_STORE`). The
6//! toolkit never guesses a store root.
7//!
8//! Scale discipline lives here: [`Store::walk`] and the layer/type-folder
9//! walks are **SWEEP** primitives used only by `validate --all`,
10//! `index rebuild`, and `stats`. The interactive loop instead uses
11//! [`Store::find_links_to`] / [`Store::find_links_to_any`] (a single
12//! presence-only content scan) and the `index.jsonl` sidecar readers
13//! ([`Store::find_by_type`] / [`Store::find_by_where`] /
14//! [`Store::read_type_index`]) — never a whole-store parse. The batch
15//! [`Store::find_links_to_any`] is what keeps the working-set validate's
16//! incoming-linker discovery a single store scan rather than one scan per
17//! changed object.
18//!
19//! Link edges are defined once, here, by the shared [`extract_edge_targets`] /
20//! [`canonical_link_target`] / [`link_edge_key`] helpers (fence-aware,
21//! whitespace-trimmed, case-folded to the filesystem), so the forward view
22//! (`graph::forwardlinks`), the backward view ([`Store::find_links_to_any`]),
23//! `rename`, and `validate` all agree on exactly which `[[...]]` is an edge.
24//! [`ensure_path_within_store`] is the within-store containment gate every
25//! caller-influenced path passes through before it is read or traversed.
26
27use std::collections::BTreeMap;
28use std::path::{Path, PathBuf};
29use std::time::{SystemTime, UNIX_EPOCH};
30
31use chrono::{DateTime, Datelike, FixedOffset};
32use ignore::WalkBuilder;
33
34use crate::index::IndexRecord;
35use crate::parser::{parse_db_md, Config, Frontmatter};
36
37/// Basenames that are never content files: the config marker and the two
38/// curator-maintained catalogs. The store walks skip these so a SWEEP over the
39/// content layers never mistakes a catalog for a record.
40///
41/// Only `index.md` is excluded by basename, because the content walks traverse
42/// the layer dirs (`sources/`/`records/`/`wiki/`) and `index.md` is the only
43/// meta file that appears INSIDE them. The root `DB.md` / `log.md` (and the
44/// `log/` archive) live at the store root, outside every layer, so they are
45/// never reached by these walks — and a content file that merely happens to be
46/// named `DB.md` or `log.md` inside a layer (e.g. `records/docs/DB.md`) is real
47/// content the SPEC does NOT reserve at type-folder depth.
48const NON_CONTENT_BASENAMES: [&str; 1] = ["index.md"];
49
50/// The complete machine-twin sidecar that backs every structured read.
51const TYPE_INDEX_FILE: &str = "index.jsonl";
52
53/// Returned when a path is opened as a store but has no `DB.md` at its root.
54/// Surfaced as the structured code `NOT_A_STORE` with a non-zero exit.
55#[derive(Debug, thiserror::Error)]
56#[error("not a db.md store: {path} has no DB.md")]
57pub struct NotAStore {
58    /// The path that was inspected.
59    pub path: PathBuf,
60}
61
62/// Errors from store-level operations (walk, locate, shard, sidecar read).
63#[derive(Debug, thiserror::Error)]
64pub enum StoreError {
65    /// A sidecar `index.jsonl` could not be read or parsed.
66    #[error("failed to read type index {path}: {message}")]
67    BadTypeIndex {
68        /// The sidecar file.
69        path: PathBuf,
70        /// What went wrong.
71        message: String,
72    },
73
74    /// A required date field for sharding was absent or unparseable, and there
75    /// was no usable fallback.
76    #[error("cannot compute shard path for {file}: no usable date field")]
77    NoShardDate {
78        /// The file being placed.
79        file: PathBuf,
80    },
81
82    /// An embedded-ripgrep scan failed to start or run.
83    #[error("search failed under {root}: {message}")]
84    Search {
85        /// The root the scan ran under.
86        root: PathBuf,
87        /// What went wrong.
88        message: String,
89    },
90
91    /// An underlying I/O failure.
92    #[error(transparent)]
93    Io(#[from] std::io::Error),
94}
95
96/// The three canonical layers of a db.md store.
97///
98/// `Ord`/`PartialOrd` are derived (additively) because sibling modules key
99/// `BTreeMap`s on `Layer` (e.g. `stats::Stats::files_per_layer`); the canonical
100/// declaration order (`Sources` < `Records`) is the sort order.
101#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
102pub enum Layer {
103    /// `sources/` — raw evidence (documentary + testimonial); immutable; date-sharded at scale.
104    Sources,
105    /// `records/` — everything the agent authors; meta-typed fact/operational/conclusion; entity types flat, event types sharded.
106    Records,
107}
108
109impl Layer {
110    /// The on-disk folder name for this layer (`"sources"` / `"records"`).
111    pub fn dir_name(self) -> &'static str {
112        match self {
113            Layer::Sources => "sources",
114            Layer::Records => "records",
115        }
116    }
117
118    /// Parse a layer from its folder name; `None` for anything else.
119    pub fn from_dir_name(name: &str) -> Option<Self> {
120        match name {
121            "sources" => Some(Layer::Sources),
122            "records" => Some(Layer::Records),
123            _ => None,
124        }
125    }
126
127    /// Every layer, in canonical order.
128    pub fn all() -> [Layer; 2] {
129        [Layer::Sources, Layer::Records]
130    }
131}
132
133/// An opened db.md store: its root path plus the parsed `DB.md` [`Config`].
134///
135/// Construct via [`Store::open`]; that is the only path in, and it validates
136/// the `DB.md` marker so downstream code can assume a real store.
137#[derive(Debug, Clone)]
138pub struct Store {
139    /// The store root (the directory containing `DB.md`).
140    pub root: PathBuf,
141    /// The parsed `DB.md` config (agent instructions, policies, schemas).
142    pub config: Config,
143}
144
145impl Store {
146    /// True if `path` is a db.md store root: an uppercase `DB.md` file exists
147    /// at `path`. On case-sensitive filesystems a lowercase `db.md` must NOT
148    /// count (the lowercase name refers to the project/spec, not the marker).
149    pub fn is_db_md_store(path: &Path) -> bool {
150        // Read the directory and match the *stored* filename byte-for-byte.
151        // `path.join("DB.md").exists()` would lie on a case-insensitive
152        // filesystem (macOS default), where a lowercase `db.md` answers a
153        // `DB.md` probe. `read_dir` returns the real on-disk name, so the
154        // exact-match check is correct on both case-sensitive (Linux) and
155        // case-insensitive filesystems.
156        let entries = match std::fs::read_dir(path) {
157            Ok(entries) => entries,
158            Err(_) => return false,
159        };
160        for entry in entries.flatten() {
161            if entry.file_name() == "DB.md" {
162                // A directory literally named `DB.md` is not the marker.
163                match entry.file_type() {
164                    Ok(ft) if ft.is_dir() => return false,
165                    Ok(_) => return true,
166                    Err(_) => return false,
167                }
168            }
169        }
170        false
171    }
172
173    /// Open `path` as a db.md store and require `DB.md` to be readable and
174    /// parseable. Normal commands should enter through this strict gate so a
175    /// damaged config cannot silently disable schema or policy rules.
176    pub fn open_strict(path: &Path) -> crate::Result<Store> {
177        if !Store::is_db_md_store(path) {
178            return Err(NotAStore {
179                path: path.to_path_buf(),
180            }
181            .into());
182        }
183        let db_md = path.join("DB.md");
184        let text = std::fs::read_to_string(&db_md)?;
185        let config = parse_db_md(&text, &db_md)?;
186        Ok(Store {
187            root: path.to_path_buf(),
188            config,
189        })
190    }
191
192    /// Open `path` as a db.md store: confirm the `DB.md` marker (else
193    /// [`NotAStore`]) and parse the `DB.md` config when possible. This is the
194    /// lenient validation-oriented open path: a damaged `DB.md` still marks the
195    /// directory as a store so `dbmd validate` can report the config error as an
196    /// issue. Normal CLI commands should use [`Store::open_strict`] instead.
197    pub fn open(path: &Path) -> Result<Store, NotAStore> {
198        if !Store::is_db_md_store(path) {
199            return Err(NotAStore {
200                path: path.to_path_buf(),
201            });
202        }
203        let db_md = path.join("DB.md");
204        // The marker exists; parse its config. A read or parse failure leaves
205        // the store openable with default config rather than masquerading as
206        // NOT_A_STORE — the marker is present, so this *is* a store; a damaged
207        // DB.md is `dbmd validate`'s job to report, not `open`'s.
208        let config = match std::fs::read_to_string(&db_md) {
209            Ok(text) => parse_db_md(&text, &db_md).unwrap_or_default(),
210            Err(_) => Config::default(),
211        };
212        Ok(Store {
213            root: path.to_path_buf(),
214            config,
215        })
216    }
217
218    /// **SWEEP.** Recursively iterate every `.md` content file across
219    /// `sources/`, `records/`, and `wiki/`, skipping hidden dirs and `log/`.
220    /// Used only by `validate --all`, `index rebuild`, and `stats` — never on
221    /// the interactive loop.
222    pub fn walk(&self) -> Result<Vec<PathBuf>, StoreError> {
223        // Only the three content layers — never root meta files (`DB.md`,
224        // `index.md`, `log.md`) and never `log/`, which live at root and are
225        // outside every layer dir.
226        let mut out = Vec::new();
227        for layer in Layer::all() {
228            out.extend(self.walk_layer(layer)?);
229        }
230        out.sort();
231        Ok(out)
232    }
233
234    /// **SWEEP.** Like [`Store::walk`] but scoped to a single layer.
235    pub fn walk_layer(&self, layer: Layer) -> Result<Vec<PathBuf>, StoreError> {
236        let layer_root = self.root.join(layer.dir_name());
237        if !layer_root.is_dir() {
238            return Ok(Vec::new());
239        }
240        self.walk_content_md(&layer_root)
241    }
242
243    /// Enumerate every `.md` file in a single type-folder, **recursing through
244    /// its date-shards** (`sources/emails/**/*.md`). The unit the index builder
245    /// and per-folder rebuild operate on. SWEEP-class (scoped to one folder).
246    pub fn walk_type_folder(&self, type_folder: &Path) -> Result<Vec<PathBuf>, StoreError> {
247        let abs = self.resolve_under_root(type_folder);
248        if !abs.is_dir() {
249            return Ok(Vec::new());
250        }
251        self.walk_content_md(&abs)
252    }
253
254    /// The ≤`n` most-recent files in a type-folder by frontmatter `updated`
255    /// (descending), ties broken by store-relative path (ascending) — a total
256    /// order, so write-through and rebuild never disagree on #500 vs #501.
257    ///
258    /// Reads `updated` across the folder's shards — a SWEEP cost absorbed into
259    /// `index rebuild`. The write-through path never calls this. The
260    /// cap-selection primitive for the 500-entry `index.md` browse view.
261    pub fn recent_in_type_folder(
262        &self,
263        type_folder: &Path,
264        n: usize,
265    ) -> Result<Vec<PathBuf>, StoreError> {
266        let files = self.walk_type_folder(type_folder)?;
267        // (updated, rel-path) for each file. Files missing/unparseable
268        // `updated` sort *after* dated ones (None last), then by path — so they
269        // are deterministically the lowest-priority candidates for the cap, not
270        // dropped silently. The total order (updated desc, path asc) is what
271        // keeps write-through and rebuild agreeing on #500 vs #501.
272        let mut keyed: Vec<(Option<DateTime<FixedOffset>>, PathBuf)> = files
273            .into_iter()
274            .map(|rel| {
275                let updated = self.read_updated(&self.abs_path(&rel));
276                (updated, rel)
277            })
278            .collect();
279        keyed.sort_by(|a, b| {
280            // `updated` descending: newest first. `None` is treated as the
281            // oldest possible, so dated files always win a cap slot over
282            // undated ones.
283            let by_updated = b.0.cmp(&a.0);
284            by_updated.then_with(|| a.1.cmp(&b.1))
285        });
286        keyed.truncate(n);
287        Ok(keyed.into_iter().map(|(_, rel)| rel).collect())
288    }
289
290    /// The shard/flat predicate: true if the type date-shards, false if it
291    /// stays flat. True for source types and event record types
292    /// (`expense`/`invoice`/`meeting` + custom `order`/`ticket`/`transaction`),
293    /// or when `DB.md ## Schemas` declares `shard: by-date`. False for
294    /// dedup-bounded entity types (`contact`/`company`/`decision`) and `wiki/`.
295    pub fn type_shards(&self, type_: &str) -> bool {
296        // A `DB.md ## Schemas` `### <type>` block with a `shard:` directive is
297        // authoritative — it is the v0.2 generic-model way to declare sharding,
298        // so it overrides the built-in default below (in either direction).
299        if let Some(shard) = self.config.schemas.get(type_).and_then(|s| s.shard) {
300            return shard;
301        }
302        // Built-in default for the example types. Sharding is a property of the
303        // *type*:
304        //  - source types carry a primary date field and shard;
305        //  - event record types track business volume and shard;
306        //  - dedup-bounded entity types and curation-bounded wiki stay flat.
307        // Any type can override this via a `shard:` directive (above).
308        matches!(
309            type_,
310            // source types (documentary + testimonial)
311            "email" | "transcript" | "pdf-source" | "note"
312            // event record types (canonical)
313            | "expense" | "invoice" | "meeting"
314            // event record types (recognized custom, per the plan)
315            | "order" | "ticket" | "transaction"
316        )
317    }
318
319    /// Compute the canonical write path for a new file. For a sharding type
320    /// (per [`Store::type_shards`]) insert `<YYYY>/<MM>/` from the type's
321    /// primary date field (`email.date`, `expense.date`, … fallback `created`)
322    /// under the type folder; flat types and `wiki/` get no shard segment.
323    /// Deterministic + stable: same input → same path, so a record never moves
324    /// once written.
325    pub fn shard_path_for(
326        &self,
327        type_: &str,
328        frontmatter: &Frontmatter,
329        name: &str,
330    ) -> Result<PathBuf, StoreError> {
331        self.shard_path_in(&default_type_folder(type_), type_, frontmatter, name)
332    }
333
334    /// Like [`Store::shard_path_for`], but compute the path under an explicit,
335    /// caller-resolved type-folder rather than the canonical default. This lets a
336    /// write surface honour an agent-supplied conforming sub-folder — e.g.
337    /// `wiki/projects/`, `wiki/people/`, `wiki/synthesis/` (the SPEC files a
338    /// `wiki-page` under `wiki/<topic>/`, i.e. ANY topic sub-folder, not only the
339    /// `wiki/topics` default) — while still applying date-sharding for sharding
340    /// types. The folder must be a conforming `<layer>/<type-folder>` (2
341    /// components, recognized layer); the caller is responsible for that (see the
342    /// CLI's `resolve_write_path`), so it is taken as given here.
343    ///
344    /// Sharding is still a property of the *type*: a sharding type gets the
345    /// `<YYYY>/<MM>` segment under `folder`; a flat type lands directly in it.
346    pub fn shard_path_in(
347        &self,
348        folder: &Path,
349        type_: &str,
350        frontmatter: &Frontmatter,
351        name: &str,
352    ) -> Result<PathBuf, StoreError> {
353        let folder = folder.to_path_buf();
354        let filename = ensure_md_extension(name);
355
356        if !self.type_shards(type_) {
357            // Flat type (entity records, wiki, decisions): no shard segment.
358            return Ok(folder.join(filename));
359        }
360
361        // Sharding type: derive <YYYY>/<MM> from the primary date field, with
362        // `created` as the universal fallback. Reading the public `Frontmatter`
363        // fields directly (typed `created`/`updated` + raw `extra`) avoids the
364        // not-yet-implemented `Frontmatter::get`/`parse` and keeps this pure.
365        let (year, month) = self
366            .primary_shard_segment(type_, frontmatter)
367            .ok_or_else(|| StoreError::NoShardDate {
368                file: folder.join(&filename),
369            })?;
370
371        Ok(folder.join(year).join(month).join(filename))
372    }
373
374    /// Find files with an incoming wiki-link to `target` via a **single
375    /// presence-only content scan** for an edge to `target` across all layers,
376    /// using the shared fence-aware/whitespace-trimmed/case-folded edge notion
377    /// ([`extract_edge_targets`]). Loop-fast; no whole-graph build. Returns
378    /// store-relative paths.
379    pub fn find_links_to(&self, target: &Path) -> Result<Vec<PathBuf>, StoreError> {
380        // A single target is just the degenerate batch case — one key, one store
381        // scan. Routing through `find_links_to_any` keeps the
382        // pattern construction and the scan loop in exactly one place. The
383        // batch API takes `&[PathBuf]`, so the one-element slice is owned (a
384        // single alloc on this single-target convenience path; the batch path
385        // validate.rs rides is untouched).
386        self.find_links_to_any(&[target.to_path_buf()])
387    }
388
389    /// Find every file with an incoming wiki-link to **any** of `targets`, in a
390    /// **single content pass** over the store (one `.md` walk, one presence-only
391    /// edge scan per file). This is the batch incoming-linker finder the
392    /// working-set [`crate::validate::validate_working_set`] sits on: it must find
393    /// the linkers for the *whole* changed set without paying a full store read
394    /// per changed object. Cost is therefore one store scan (O(store)), NOT
395    /// `targets.len() × store` — calling [`find_links_to`](Self::find_links_to)
396    /// in a loop would reread every `.md` once per target and is the exact
397    /// `O(changed × store)` blow-up this method exists to prevent. Returns
398    /// store-relative paths (deduped, sorted).
399    ///
400    /// **One edge notion with `forwardlinks`/`rename`/`validate`.** A file links
401    /// to a target iff [`extract_edge_targets`] (fence-aware, whitespace-trimmed)
402    /// of its content yields a target whose [`link_edge_key`] equals the target's
403    /// — the *same* definition the forward view and the rename rewriter use. The
404    /// previous implementation used a literal-adjacency ripgrep regex that (a)
405    /// matched `[[...]]` text inside fenced code examples (which validate treats
406    /// as non-edges), (b) missed inner-whitespace padding (`[[ x ]]`), and (c)
407    /// compared case-sensitively even where the filesystem resolves links
408    /// case-insensitively — so backlinks/links/rename silently disagreed with
409    /// forwardlinks and validate. Reading content and routing through the shared
410    /// extractor removes all three divergences.
411    ///
412    /// Why content scan and not the sidecar `links` field: the sidecar projects
413    /// only the frontmatter `links:` array, so it misses edges written in the
414    /// body or in typed fields (`company: [[…]]`). Finding an incoming link to an
415    /// arbitrary path therefore requires reading file content.
416    pub fn find_links_to_any(&self, targets: &[PathBuf]) -> Result<Vec<PathBuf>, StoreError> {
417        // Build the set of comparison keys for the requested targets, in the
418        // canonical (case-folded where the filesystem is case-insensitive) form
419        // the edge extractor emits. An empty key (a target that renders to no
420        // link text, e.g. `""` or `"./"`) contributes nothing — and crucially the
421        // empty set short-circuits below so we never report every file.
422        let want: std::collections::HashSet<String> = targets
423            .iter()
424            .filter_map(|t| {
425                let canonical = canonical_link_target(&t.to_string_lossy());
426                if canonical.is_empty() {
427                    None
428                } else {
429                    Some(link_edge_key(&canonical))
430                }
431            })
432            .collect();
433        if want.is_empty() {
434            return Ok(Vec::new());
435        }
436
437        let mut hits = std::collections::BTreeSet::new();
438        // Scan every `.md` file in the store (skip hidden + `log/`), including
439        // `index.md` catalogs — an incoming reference is wherever the link text
440        // lives; the caller decides relevance. ONE walk for the whole target set;
441        // per file we stop at the first matching edge (presence is all we need),
442        // so a file that links to several targets is read once, not once per
443        // target.
444        for rel in self.walk_all_md()? {
445            let abs = self.abs_path(&rel);
446            // Read lossily: a `.md` verbatim-ingested into `sources/` can carry a
447            // stray non-UTF-8 byte (a mis-decoded Latin-1 import). Decoding
448            // lossily substitutes replacement characters instead of erroring, so
449            // one bad byte on a link-bearing line no longer aborts the whole
450            // store scan (the historical `UTF8`-sink failure). The link syntax is
451            // ASCII, so a replacement char elsewhere on the line never hides a
452            // `[[...]]`. A read error (not a decode error) is genuine I/O trouble
453            // and propagates.
454            let bytes = match std::fs::read(&abs) {
455                Ok(b) => b,
456                Err(e) => {
457                    return Err(StoreError::Search {
458                        root: self.root.clone(),
459                        message: format!("read failed in {}: {e}", abs.display()),
460                    })
461                }
462            };
463            let text = String::from_utf8_lossy(&bytes);
464            for target in extract_edge_targets(&text) {
465                if want.contains(&link_edge_key(&target)) {
466                    hits.insert(rel);
467                    break;
468                }
469            }
470        }
471        Ok(hits.into_iter().collect())
472    }
473
474    /// Candidate set for a `type` query: read every type-folder `index.jsonl`
475    /// sidecar in the type's single layer and return the records of that
476    /// `type`. Complete and cold-cache-proof — NOT a walk-and-parse or a
477    /// frontmatter ripgrep scan, and **never a store-wide read**.
478    ///
479    /// The read is bounded to the type's one layer subtree
480    /// (O(entities-in-layer)): a type lives in exactly one layer, and
481    /// `default_type_folder` always encodes it (recognized → its SPEC layer;
482    /// unrecognized → `records/`), so the walk never fans out across every
483    /// sidecar in the store and stays inside the interactive loop's
484    /// O(entities) contract.
485    ///
486    /// The whole-layer read — rather than reading only the type's canonical
487    /// folder sidecar when it happens to exist — is what makes the result
488    /// *complete*. A single `type` can legitimately be filed across several
489    /// folders within its layer: `wiki-page` under `wiki/<topic>/` for any
490    /// topic (SPEC), or a `contact` filed in `records/clients/` alongside the
491    /// canonical `records/contacts/`. The previous code read only the
492    /// canonical-guess sidecar whenever it was a file, which silently dropped
493    /// those non-canonical records the moment the canonical sidecar existed —
494    /// returning an incomplete set, and a *different* set as the store grew
495    /// (the omission flipped on once one canonical record was added). That
496    /// broke the dedup/enumeration premise this primitive backs and disagreed
497    /// with `find_by_where_in`, which already walks the whole layer. Filtering
498    /// the layer read by `type` keeps the result complete regardless of how the
499    /// type's records are foldered.
500    pub fn find_by_type(&self, type_: &str) -> Result<Vec<IndexRecord>, StoreError> {
501        let canonical_folder = default_type_folder(type_);
502        let records = self.read_all_type_indexes_in(layer_of_folder(&canonical_folder))?;
503        Ok(records.into_iter().filter(|r| r.type_ == type_).collect())
504    }
505
506    /// Candidate set for a `key=value` frontmatter query, **store-wide**: read
507    /// every type-folder `index.jsonl` sidecar and filter their records. The
508    /// unscoped pre-write dedup primitive; prefer [`Store::find_by_where_in`]
509    /// with a layer scope to stay O(entities-in-layer) on the interactive loop.
510    pub fn find_by_where(&self, key: &str, value: &str) -> Result<Vec<IndexRecord>, StoreError> {
511        self.find_by_where_in(key, value, None)
512    }
513
514    /// Candidate set for a `key=value` frontmatter query, **scoped to one
515    /// layer** when `layer` is `Some`: the sidecar walk is confined to that
516    /// layer's subtree (`<root>/<layer>/`), so the I/O is O(entities-in-layer),
517    /// not O(store records). `None` keeps the store-wide read.
518    ///
519    /// This is what makes `--in <layer>` an I/O scope, not just a result
520    /// filter: a `--where`-only query (no `--type`) used to read every sidecar
521    /// in the store and narrow by layer in memory, breaking the O(entities)
522    /// contract the interactive loop depends on. With a layer in hand we walk
523    /// only that layer's sidecars.
524    pub fn find_by_where_in(
525        &self,
526        key: &str,
527        value: &str,
528        layer: Option<Layer>,
529    ) -> Result<Vec<IndexRecord>, StoreError> {
530        // A `key=value` query can target any frontmatter field across any type,
531        // so within the chosen subtree we still read every type-folder sidecar
532        // and filter. The layer (when given) bounds *which* subtree, turning a
533        // whole-store walk into a single-layer walk.
534        let records = self.read_all_type_indexes_in(layer)?;
535        Ok(records
536            .into_iter()
537            .filter(|r| record_matches_field(r, key, value))
538            .collect())
539    }
540
541    /// Every record across the type-folder `index.jsonl` sidecars, scoped to one
542    /// layer when `layer` is `Some` (the walk is confined to `<root>/<layer>/`)
543    /// else store-wide. Sequential, complete sidecar reads — never a
544    /// walk-and-parse of the content tree.
545    ///
546    /// This is the unfiltered sidecar-enumeration primitive the relationship
547    /// loop sits on: [`crate::graph::backlinks_filtered`] uses it to bound its
548    /// candidate set to the relevant layer (or the whole store) without opening
549    /// the content tree, then confirms each candidate's edge by parsing the file.
550    pub fn sidecar_records(&self, layer: Option<Layer>) -> Result<Vec<IndexRecord>, StoreError> {
551        self.read_all_type_indexes_in(layer)
552    }
553
554    /// Parse a type-folder's `index.jsonl` into [`IndexRecord`]s, applying
555    /// last-write-wins by `path` over any un-compacted lines. The sidecar-read
556    /// primitive every structured query sits on.
557    pub fn read_type_index(&self, index_jsonl: &Path) -> Result<Vec<IndexRecord>, StoreError> {
558        let text = std::fs::read_to_string(index_jsonl).map_err(|e| StoreError::BadTypeIndex {
559            path: index_jsonl.to_path_buf(),
560            message: e.to_string(),
561        })?;
562
563        // Last-write-wins by `path` over un-compacted lines: a later line for
564        // the same path supersedes an earlier one (the jsonl is append-mostly
565        // and only compacted on rebuild). Blank lines are skipped; a non-blank
566        // line that is not a valid IndexRecord is a hard parse error.
567        let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
568        for (i, line) in text.lines().enumerate() {
569            let trimmed = line.trim();
570            if trimmed.is_empty() {
571                continue;
572            }
573            let record: IndexRecord =
574                serde_json::from_str(trimmed).map_err(|e| StoreError::BadTypeIndex {
575                    path: index_jsonl.to_path_buf(),
576                    message: format!("line {}: {e}", i + 1),
577                })?;
578            by_path.insert(record.path.clone(), record);
579        }
580        // BTreeMap keyed by path → records emerge sorted by path ascending,
581        // a deterministic order independent of line order in the file.
582        Ok(by_path.into_values().collect())
583    }
584
585    /// Resolve a store-relative path to its absolute on-disk path under
586    /// [`root`](Store::root).
587    pub fn abs_path(&self, store_relative: &Path) -> PathBuf {
588        // `Path::join` returns `store_relative` unchanged if it is already
589        // absolute, so passing an absolute path through is a no-op.
590        self.root.join(store_relative)
591    }
592
593    /// Convert an absolute path under the store into its store-relative form.
594    pub fn rel_path(&self, abs: &Path) -> Option<PathBuf> {
595        abs.strip_prefix(&self.root).ok().map(|p| p.to_path_buf())
596    }
597
598    // ── Private helpers ─────────────────────────────────────────────────────
599
600    /// Resolve a caller-supplied folder path (store-relative or absolute) to an
601    /// absolute path under the store root.
602    fn resolve_under_root(&self, folder: &Path) -> PathBuf {
603        if folder.is_absolute() {
604            folder.to_path_buf()
605        } else {
606            self.root.join(folder)
607        }
608    }
609
610    /// Walk a subtree for content `.md` files (skip hidden dirs, skip `index.md`
611    /// / `DB.md` / `log.md`), returning store-relative paths. Used by the layer
612    /// and type-folder walks.
613    fn walk_content_md(&self, root: &Path) -> Result<Vec<PathBuf>, StoreError> {
614        let mut out = Vec::new();
615        for entry in self.md_walker(root).build() {
616            let entry = entry.map_err(|e| StoreError::Search {
617                root: root.to_path_buf(),
618                message: e.to_string(),
619            })?;
620            if !is_file_entry(&entry) {
621                continue;
622            }
623            let path = entry.path();
624            if !has_md_extension(path) {
625                continue;
626            }
627            if is_non_content_basename(path) {
628                continue;
629            }
630            if let Some(rel) = self.rel_path(path) {
631                out.push(rel);
632            }
633        }
634        out.sort();
635        Ok(out)
636    }
637
638    /// Walk the whole store for **every** `.md` file (including `index.md`),
639    /// skipping hidden dirs and the `log/` archive tree. Used by the backlink
640    /// scan, where the literal link text can live in any markdown file.
641    fn walk_all_md(&self) -> Result<Vec<PathBuf>, StoreError> {
642        let mut out = Vec::new();
643        for entry in self.md_walker(&self.root).build() {
644            let entry = entry.map_err(|e| StoreError::Search {
645                root: self.root.clone(),
646                message: e.to_string(),
647            })?;
648            if !is_file_entry(&entry) {
649                continue;
650            }
651            let path = entry.path();
652            if !has_md_extension(path) {
653                continue;
654            }
655            if self.is_in_log_dir(path) {
656                continue;
657            }
658            if let Some(rel) = self.rel_path(path) {
659                out.push(rel);
660            }
661        }
662        out.sort();
663        Ok(out)
664    }
665
666    /// Read and merge every type-folder `index.jsonl` sidecar under `layer`
667    /// when given, else the whole store (skip hidden + `log/`). Each sidecar is
668    /// read with last-write-wins by path; across sidecars, paths are disjoint by
669    /// construction (one sidecar per folder), so a plain concatenation preserves
670    /// completeness. A layer scope confines the walk to `<root>/<layer>/`, which
671    /// is what keeps `find_by_where_in` O(entities-in-layer).
672    fn read_all_type_indexes_in(
673        &self,
674        layer: Option<Layer>,
675    ) -> Result<Vec<IndexRecord>, StoreError> {
676        let mut out = Vec::new();
677        for sidecar in self.find_type_index_files_in(layer)? {
678            out.extend(self.read_type_index(&self.abs_path(&sidecar))?);
679        }
680        Ok(out)
681    }
682
683    /// Locate every `index.jsonl` sidecar under `layer` (when given) else the
684    /// whole store (skip hidden + `log/`), returning store-relative paths. A
685    /// scoped read walks `<root>/<layer>/`; the store-wide read enumerates the
686    /// three canonical layer subtrees (`sources/`, `records/`, `wiki/`) — the
687    /// same store model [`Store::walk`] uses — rather than walking from
688    /// `self.root`. Walking from root would descend into non-layer top-level
689    /// dirs (`EXPECTED/` test goldens, an `archive/` of frozen index copies,
690    /// any sibling folder holding store-relative `path`s), pulling their
691    /// sidecars in and returning every record twice. A non-existent layer
692    /// subtree yields no sidecars rather than walking a missing path.
693    fn find_type_index_files_in(&self, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
694        // Store-wide read: union the per-layer scoped reads so only the three
695        // content layers are walked (never root meta files or non-layer dirs),
696        // matching `Store::walk`. The per-layer paths are disjoint by folder, so
697        // a plain concatenation preserves completeness.
698        let Some(layer) = layer else {
699            let mut out = Vec::new();
700            for l in Layer::all() {
701                out.extend(self.find_type_index_files_in(Some(l))?);
702            }
703            out.sort();
704            return Ok(out);
705        };
706        let walk_root = self.root.join(layer.dir_name());
707        // A scoped walk over a layer folder that does not exist yet must be an
708        // empty result, mirroring `walk_layer`'s missing-dir guard — not a walk
709        // error from `ignore` over a nonexistent path.
710        if !walk_root.is_dir() {
711            return Ok(Vec::new());
712        }
713        let mut out = Vec::new();
714        let mut builder = WalkBuilder::new(&walk_root);
715        builder
716            .standard_filters(false)
717            .hidden(true)
718            .follow_links(true);
719        for entry in builder.build() {
720            let entry = entry.map_err(|e| StoreError::Search {
721                root: walk_root.clone(),
722                message: e.to_string(),
723            })?;
724            if !is_file_entry(&entry) {
725                continue;
726            }
727            let path = entry.path();
728            if path.file_name().and_then(|n| n.to_str()) != Some(TYPE_INDEX_FILE) {
729                continue;
730            }
731            if self.is_in_log_dir(path) {
732                continue;
733            }
734            if let Some(rel) = self.rel_path(path) {
735                out.push(rel);
736            }
737        }
738        out.sort();
739        Ok(out)
740    }
741
742    /// A `WalkBuilder` configured for db.md SWEEPs: gitignore/global-ignore are
743    /// OFF (a SWEEP must see every file even if the store is a git repo with a
744    /// `.gitignore`), but hidden files/dirs are skipped. Symlinks are
745    /// **followed** (`follow_links(true)`) so a symlinked `.md` content file or
746    /// a symlinked type folder (e.g. `records/companies -> /other/disk/...`) is
747    /// walked like any other content rather than silently vanishing; a symlinked
748    /// layer dir was already traversed (the walk root is followed), so following
749    /// symlinks one level deeper just removes that inconsistency.
750    fn md_walker(&self, root: &Path) -> WalkBuilder {
751        let mut builder = WalkBuilder::new(root);
752        builder
753            .standard_filters(false)
754            .hidden(true)
755            .follow_links(true);
756        builder
757    }
758
759    /// True if an absolute path lives under the store's root-level `log/`
760    /// rotation-archive directory.
761    fn is_in_log_dir(&self, abs: &Path) -> bool {
762        match self.rel_path(abs) {
763            Some(rel) => rel.components().next().map(|c| c.as_os_str()) == Some("log".as_ref()),
764            None => false,
765        }
766    }
767
768    /// Read a file's frontmatter `updated` field as an RFC3339 timestamp,
769    /// returning `None` when absent/unparseable. A self-contained reader (does
770    /// not depend on the not-yet-implemented `parser::read_file`); parses the
771    /// leading `---`-fenced YAML block with the same engine the parser uses.
772    fn read_updated(&self, abs: &Path) -> Option<DateTime<FixedOffset>> {
773        let text = std::fs::read_to_string(abs).ok()?;
774        let yaml = frontmatter_block(&text)?;
775        let value: serde_norway::Value = serde_norway::from_str(yaml).ok()?;
776        let raw = value.get("updated")?;
777        value_to_datetime(raw)
778    }
779
780    /// The `<YYYY>/<MM>` shard segment for a sharding type, from its primary
781    /// date field with a `created` fallback. Reads the public `Frontmatter`
782    /// fields directly. `None` when no usable date is present.
783    fn primary_shard_segment(&self, type_: &str, fm: &Frontmatter) -> Option<(String, String)> {
784        // Try the type's primary date field first.
785        if let Some(field) = primary_date_field(type_) {
786            if let Some(v) = fm.extra.get(field) {
787                if let Some(seg) = value_to_year_month(v) {
788                    return Some(seg);
789                }
790            }
791        }
792        // Universal fallback: the typed `created` timestamp.
793        fm.created
794            .map(|dt| (format!("{:04}", dt.year()), format!("{:02}", dt.month())))
795    }
796}
797
798// ── Path containment (security) ─────────────────────────────────────────────
799
800/// Canonicalize `candidate` (resolving symlinks; for a not-yet-existing leaf,
801/// canonicalize its existing parent chain and re-append the leaf) and return it
802/// only if it resolves inside `store_root`; otherwise `Err`.
803///
804/// This is the single within-store containment gate. A wiki-link target, a
805/// rename destination, or any other caller-influenced path must pass through
806/// here before it is read or traversed, so a `..`-laden or symlink-escaping
807/// target can never turn a store operation into a read of an arbitrary file
808/// outside the store. `store_root` itself is canonicalized first so the
809/// `starts_with` comparison is symlink-stable on both sides (e.g. macOS's
810/// `/tmp` → `/private/tmp`).
811pub fn ensure_path_within_store(store_root: &Path, candidate: &Path) -> std::io::Result<PathBuf> {
812    // The `..` rejection below must apply only to the *caller-influenced* tail of
813    // the candidate — never to a `..` the trusted `store_root` itself carries.
814    // Callers build the candidate as `store_root.join(rel)`, so a user-supplied
815    // `--dir ../../some/store` legitimately seeds every candidate with leading
816    // `..` components that belong to the root, not to the sidecar/link target.
817    // Strip the trusted `store_root` prefix lexically and scrutinize only what
818    // remains; the root's own `..` is resolved safely by `canonicalize()` just
819    // below. A candidate that does NOT begin with `store_root` (an absolute
820    // out-of-store path, a CWD-relative target) keeps the whole path under
821    // scrutiny — there is no trusted prefix to exempt.
822    let scrutinized = candidate.strip_prefix(store_root).unwrap_or(candidate);
823
824    // Reject any `..` component in the scrutinized tail. A `ParentDir` can never
825    // be resolved safely by lexical normalization: once a symlink sits earlier in
826    // the path, `foo/../bar` does NOT equal `bar`, and canonicalizing the existing
827    // prefix (below) would silently collapse `records/contacts/../../outside` down
828    // to a path that *appears* inside the root, masking the traversal. There is no
829    // legitimate in-store caller that needs `..` in the tail — wiki-link targets,
830    // rename destinations, and graph reads are all forward (`Normal`-only) paths —
831    // so a tail `..` is always either an escape attempt or a malformed target.
832    if scrutinized
833        .components()
834        .any(|c| matches!(c, std::path::Component::ParentDir))
835    {
836        return Err(std::io::Error::new(
837            std::io::ErrorKind::PermissionDenied,
838            format!(
839                "path {} contains a `..` component beyond the store root {} and cannot be contained",
840                candidate.display(),
841                store_root.display()
842            ),
843        ));
844    }
845
846    // Canonicalize the root so both sides of the containment check are in the
847    // same (fully-resolved) namespace. This also resolves any `..` the root
848    // itself carries (the user-supplied `--dir`), which the tail-only check above
849    // deliberately left in place.
850    let root = store_root.canonicalize()?;
851
852    // Resolve the candidate as far as it exists on disk. `canonicalize` fails on
853    // a not-yet-existing leaf, so peel trailing components until the remaining
854    // prefix exists, canonicalize that, then re-append the peeled tail. This
855    // resolves any symlink in the existing parent chain (an escape vector) while
856    // still working for a target that does not exist yet (a rename destination).
857    let mut existing = candidate.to_path_buf();
858    let mut tail: Vec<std::ffi::OsString> = Vec::new();
859    let resolved_prefix = loop {
860        match existing.canonicalize() {
861            Ok(p) => break p,
862            Err(_) => {
863                // No existing prefix left to canonicalize → resolve relative to
864                // the canonical root (the candidate is somewhere under, or
865                // escaping from, the store) and let the containment check below
866                // decide. Pop one component and keep peeling.
867                match existing.file_name() {
868                    Some(name) => {
869                        tail.push(name.to_os_string());
870                        if !existing.pop() {
871                            // Ran out of components without finding an existing
872                            // prefix: anchor the un-resolvable remainder at the
873                            // canonical root so a relative candidate is judged
874                            // against the store, not the process CWD.
875                            break root.clone();
876                        }
877                    }
878                    None => {
879                        // A root/prefix component with no file name and no
880                        // on-disk existence: anchor at the canonical root.
881                        break root.clone();
882                    }
883                }
884            }
885        }
886    };
887
888    // Reassemble: canonical existing prefix + the peeled (still-virtual) tail,
889    // in original order (the peel pushed them reversed).
890    let mut resolved = resolved_prefix;
891    for name in tail.into_iter().rev() {
892        resolved.push(name);
893    }
894
895    if resolved.starts_with(&root) {
896        Ok(resolved)
897    } else {
898        Err(std::io::Error::new(
899            std::io::ErrorKind::PermissionDenied,
900            format!(
901                "path {} resolves outside the store root {}",
902                candidate.display(),
903                store_root.display()
904            ),
905        ))
906    }
907}
908
909// ── The shared wiki-link edge notion (graph / stats / validate / rename) ─────
910//
911// One definition of "what `[[...]]` text is a real edge" that every relationship
912// op keys on, so `forwardlinks`, `backlinks`, `links`, `stats`, and `rename`
913// never disagree with each other (or with `validate`'s body extractor):
914//
915//   1. **Fence-aware.** A `[[...]]` inside a ``` / ~~~ fenced code block is a
916//      documentation example, not an edge — exactly `validate`'s rule. Counting
917//      it as an edge over-reports backlinks, falsely un-orphans the page, and
918//      (worst) lets `rename` rewrite verbatim example text.
919//   2. **Whitespace-trimmed.** `[[ records/contacts/sarah ]]` is the same edge
920//      as `[[records/contacts/sarah]]`. The inner padding is cosmetic; both the
921//      forward and the backward view must resolve it identically.
922//   3. **Case-folded to the filesystem.** Link *resolution* is `is_file()`,
923//      which is case-insensitive on macOS/Windows. So on a case-insensitive
924//      filesystem `[[records/contacts/Sarah-Chen]]` and the on-disk
925//      `sarah-chen.md` are the SAME edge; the comparison key must case-fold to
926//      match, or backlinks/rename silently miss the link while validate (which
927//      resolves via the filesystem) considers it fine.
928
929/// Canonicalize a raw `[[...]]` inner target into the wiki-link key: forward
930/// slashes, no leading `./` or `/`, no trailing `.md`, inner whitespace trimmed.
931/// The single key forward and backward edges are compared on. Pairs with
932/// [`link_edge_key`] for the case-fold step.
933pub fn canonical_link_target(raw: &str) -> String {
934    let mut s = raw.trim().replace('\\', "/");
935    while let Some(rest) = s.strip_prefix("./") {
936        s = rest.to_string();
937    }
938    let s = s.trim_start_matches('/');
939    let s = s.strip_suffix(".md").unwrap_or(s);
940    s.trim().to_string()
941}
942
943/// The comparison key for a canonical link target: identity on a case-sensitive
944/// filesystem, ASCII-lowercased on a case-insensitive one (macOS/Windows), so
945/// the string-keyed edge comparison agrees with the filesystem's case-folding
946/// `is_file()` resolution. Callers compare `link_edge_key(a) == link_edge_key(b)`.
947pub fn link_edge_key(canonical_target: &str) -> String {
948    if fs_is_case_insensitive() {
949        canonical_target.to_ascii_lowercase()
950    } else {
951        canonical_target.to_string()
952    }
953}
954
955/// Extract every wiki-link edge target from a markdown body, fence-aware and
956/// whitespace-trimmed, in document order (duplicates kept — callers dedup).
957/// Returns canonical targets (see [`canonical_link_target`]); the case-fold for
958/// comparison is applied separately via [`link_edge_key`] so the canonical form
959/// (used for rewrites/output) stays case-preserving.
960///
961/// Scans line-by-line tracking the fence state inline (no whole-body
962/// allocation), exactly mirroring validate's `extract_wiki_links`: the fence
963/// state is a `(fence char, run length)` tracked via [`fence_opens`] /
964/// [`fence_closes`] — NOT a bool toggled on any ``` / `~~~` line. The naive
965/// toggle inverts mid-block when a `~~~` block legally contains a ```` ``` ````
966/// line (the standard way to document a backtick fence), or when a `>3`-space-
967/// indented ``` is mistaken for a fence — both of which would let a fenced
968/// example `[[…]]` leak out as a live edge (a false dependent for
969/// backlinks/rename). Fenced lines never yield edges. Within a line, the text
970/// before the first `|` is the target; a target whose trimmed form starts with
971/// `[` is the rejected triple-bracket flow-form list mis-encoding
972/// (`[[[a]], [[b]]]`), not a real link — skipped, matching validate.
973pub fn extract_edge_targets(body: &str) -> Vec<String> {
974    let mut out = Vec::new();
975    let mut fence: Option<(u8, usize)> = None;
976    for line in body.lines() {
977        let content = line.trim_end_matches('\r');
978        if let Some(f) = fence {
979            if fence_closes(content, f) {
980                fence = None;
981            }
982            continue;
983        }
984        if let Some(opened) = fence_opens(content) {
985            fence = Some(opened);
986            continue;
987        }
988        let bytes = line.as_bytes();
989        let mut i = 0usize;
990        while i + 1 < bytes.len() {
991            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
992                if let Some(close) = line[i + 2..].find("]]") {
993                    let inner = &line[i + 2..i + 2 + close];
994                    let raw_target = inner.split('|').next().unwrap_or(inner).trim();
995                    if !raw_target.is_empty() && !raw_target.starts_with('[') {
996                        let canonical = canonical_link_target(raw_target);
997                        if !canonical.is_empty() {
998                            out.push(canonical);
999                        }
1000                    }
1001                    i = i + 2 + close + 2;
1002                    continue;
1003                }
1004            }
1005            i += 1;
1006        }
1007    }
1008    out
1009}
1010
1011/// If `line` opens a fenced code block, return `(fence byte, run length)`. The
1012/// single fence-open rule shared by [`extract_edge_targets`] and graph's
1013/// `rewrite_links_to`, mirroring validate's `fence_opens` and the parser's
1014/// `opening_fence` so every link op tracks fences identically: a fence is
1015/// ```` ``` ```` or `~~~` (run ≥ 3) at ≤ 3 spaces of indent, and a backtick
1016/// fence's info string may not itself contain a backtick.
1017pub fn fence_opens(line: &str) -> Option<(u8, usize)> {
1018    let indent = line.len() - line.trim_start_matches(' ').len();
1019    if indent > 3 {
1020        return None;
1021    }
1022    let rest = &line[indent..];
1023    let byte = rest.bytes().next()?;
1024    if byte != b'`' && byte != b'~' {
1025        return None;
1026    }
1027    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1028    if run < 3 {
1029        return None;
1030    }
1031    // A backtick fence's info string may not itself contain a backtick.
1032    if byte == b'`' && rest[run..].contains('`') {
1033        return None;
1034    }
1035    Some((byte, run))
1036}
1037
1038/// True if `line` closes the currently open `fence`: same char, run at least as
1039/// long, nothing but trailing whitespace after. Mirrors validate's
1040/// `fence_closes` / the parser's `is_closing_fence`, so an inner fence of the
1041/// *other* character (a ```` ``` ```` line inside a `~~~` block) does NOT close
1042/// the outer fence.
1043pub fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
1044    let (byte, open_len) = fence;
1045    let indent = line.len() - line.trim_start_matches(' ').len();
1046    if indent > 3 {
1047        return false;
1048    }
1049    let rest = &line[indent..];
1050    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1051    if run < open_len {
1052        return false;
1053    }
1054    rest[run..].trim().is_empty()
1055}
1056
1057/// True when the host filesystem resolves paths case-insensitively (macOS/
1058/// Windows default). Probed once per process against the OS temp dir by creating
1059/// a lowercase marker and stat-ing its uppercase spelling. A probe failure
1060/// conservatively reports `false` (case-sensitive) — the historical behavior —
1061/// so a transient temp-dir issue never silently widens matching.
1062fn fs_is_case_insensitive() -> bool {
1063    use std::sync::OnceLock;
1064    static CASE_INSENSITIVE: OnceLock<bool> = OnceLock::new();
1065    *CASE_INSENSITIVE.get_or_init(|| {
1066        let dir = std::env::temp_dir();
1067        let pid = std::process::id();
1068        let nanos = SystemTime::now()
1069            .duration_since(UNIX_EPOCH)
1070            .map(|d| d.as_nanos())
1071            .unwrap_or(0);
1072        let lower = dir.join(format!(".dbmd-case-probe-{pid}-{nanos}"));
1073        let upper = dir.join(format!(".DBMD-CASE-PROBE-{pid}-{nanos}"));
1074        // Create the lowercase marker; if its uppercase spelling then resolves to
1075        // a file, the filesystem folded the case → case-insensitive.
1076        let result = match std::fs::File::create(&lower) {
1077            Ok(_) => upper.is_file(),
1078            Err(_) => false,
1079        };
1080        let _ = std::fs::remove_file(&lower);
1081        result
1082    })
1083}
1084
1085// ── Free helpers (no `self`) ────────────────────────────────────────────────
1086
1087/// True if a walk entry is a regular file, **following symlinks** so a
1088/// symlinked `.md` content file (or a file inside a symlinked type folder) is
1089/// counted like any other content file.
1090///
1091/// The store walks enable `follow_links(true)`, so a symlink entry's
1092/// `file_type()` still reports `is_symlink()` (the `ignore` walker does not
1093/// rewrite the entry's own type), not the followed target's type. Treat a
1094/// symlink whose target is a regular file as a file: `stat` (follow) the path
1095/// and check. A broken symlink (no target) is not a file.
1096fn is_file_entry(entry: &ignore::DirEntry) -> bool {
1097    match entry.file_type() {
1098        Some(ft) if ft.is_file() => true,
1099        Some(ft) if ft.is_symlink() => std::fs::metadata(entry.path())
1100            .map(|m| m.is_file())
1101            .unwrap_or(false),
1102        // A `None` file type (the walk root itself) or a non-file/non-symlink
1103        // entry is not a content file.
1104        _ => false,
1105    }
1106}
1107
1108/// True if the path ends in a `.md` extension (case-sensitive — db.md files are
1109/// lowercase `.md`).
1110fn has_md_extension(path: &Path) -> bool {
1111    path.extension().and_then(|e| e.to_str()) == Some("md")
1112}
1113
1114/// True if the basename is a non-content meta file (`DB.md`, `index.md`,
1115/// `log.md`) that the content walks must skip.
1116fn is_non_content_basename(path: &Path) -> bool {
1117    match path.file_name().and_then(|n| n.to_str()) {
1118        Some(name) => NON_CONTENT_BASENAMES.contains(&name),
1119        None => false,
1120    }
1121}
1122
1123/// Append `.md` to a bare name; leave an existing `.md` untouched.
1124fn ensure_md_extension(name: &str) -> String {
1125    if name.ends_with(".md") {
1126        name.to_string()
1127    } else {
1128        format!("{name}.md")
1129    }
1130}
1131
1132/// The canonical default folder for a recognized type, per the SPEC type table
1133/// (`email → sources/emails`, `expense → records/expenses`, …). Unrecognized
1134/// types fall back to `records/<type>` (the bare type name, no pluralization
1135/// guess) — see the store findings on the docstring's looser `<type>` phrasing.
1136fn default_type_folder(type_: &str) -> PathBuf {
1137    let path = match type_ {
1138        // sources — documentary
1139        "email" => "sources/emails",
1140        "transcript" => "sources/transcripts",
1141        "pdf-source" => "sources/docs",
1142        // sources — testimonial (a human told the agent X)
1143        "note" => "sources/notes",
1144        // records — entities
1145        "contact" => "records/contacts",
1146        "company" => "records/companies",
1147        // records — events
1148        "expense" => "records/expenses",
1149        "meeting" => "records/meetings",
1150        "decision" => "records/decisions",
1151        "invoice" => "records/invoices",
1152        // unrecognized: bare type name under records/ (conclusions and any
1153        // custom type land here, e.g. `concept` → `records/concept`).
1154        other => return PathBuf::from("records").join(other),
1155    };
1156    PathBuf::from(path)
1157}
1158
1159/// The canonical [`Layer`] a `type_` belongs to, derived from its default
1160/// type-folder (`email` → `Sources`, `contact` → `Records`, `wiki-page` →
1161/// `Wiki`, unrecognized → `Records`). The write path uses this to decide whether
1162/// an agent-supplied folder is in the *right* layer for the type before honouring
1163/// its sub-folder choice.
1164pub fn layer_for_type(type_: &str) -> Layer {
1165    layer_of_folder(&default_type_folder(type_)).unwrap_or(Layer::Records)
1166}
1167
1168/// The [`Layer`] a type-folder path lives in, read from its first component
1169/// (`sources/` → `Sources`, `records/` → `Records`, `wiki/` → `Wiki`). Used to
1170/// bound [`Store::find_by_type`]'s whole-layer sidecar read to a single layer
1171/// subtree. Returns `None` for a path with no recognized layer prefix; every
1172/// value [`default_type_folder`] produces has one, so in practice this is
1173/// always `Some` on the call path — `None` degrades to a store-wide read.
1174fn layer_of_folder(folder: &Path) -> Option<Layer> {
1175    let first = folder.components().next()?.as_os_str().to_str()?;
1176    Layer::from_dir_name(first)
1177}
1178
1179/// Infer a content file's canonical `type` from its store-relative path — the
1180/// inverse of [`default_type_folder`] and the single source of truth for
1181/// path→type inference (the CLI's `fm init` calls this, never re-derives it).
1182///
1183/// Requires the canonical `<layer>/<type-folder>/<file>` 3-component shape; a
1184/// shorter path (a file directly under a layer) or an unknown leading layer
1185/// yields `None`.
1186///
1187/// Recognized `(layer, folder)` pairs map back to their canonical type. For an
1188/// unrecognized folder the fallback is the **bare folder name verbatim** (no
1189/// pluralization/singularization) so it round-trips with `default_type_folder`,
1190/// whose unrecognized fallback is the bare type name (`task` ⇄ `records/task`).
1191/// Singularizing here would break that round-trip (`records/tasks` → `task`
1192/// while `default_type_folder("task")` → `records/task`). `wiki/<topic>` always
1193/// infers `wiki-page`, since every wiki page is filed under a topic folder.
1194pub fn infer_type_from_path(rel: &Path) -> Option<String> {
1195    let mut comps = rel.components().filter_map(|c| c.as_os_str().to_str());
1196    let layer = comps.next()?;
1197    if !matches!(layer, "sources" | "records") {
1198        return None;
1199    }
1200    let folder = comps.next()?;
1201    // The file itself must be a third component (a real type-folder, not the
1202    // file sitting directly under the layer).
1203    comps.next()?;
1204
1205    let mapped = match (layer, folder) {
1206        ("sources", "emails") => "email",
1207        ("sources", "transcripts") => "transcript",
1208        ("sources", "docs") => "pdf-source",
1209        ("sources", "notes") => "note",
1210        ("records", "contacts") => "contact",
1211        ("records", "companies") => "company",
1212        ("records", "expenses") => "expense",
1213        ("records", "meetings") => "meeting",
1214        ("records", "decisions") => "decision",
1215        ("records", "invoices") => "invoice",
1216        // Unrecognized folder: the bare name, verbatim. This is the inverse of
1217        // `default_type_folder`'s unrecognized fallback (`other → records/other`)
1218        // and the round-trip would break if we pluralized/singularized here.
1219        (_, other) => other,
1220    };
1221    Some(mapped.to_string())
1222}
1223
1224/// The primary date field name for a sharding type (the field whose value
1225/// drives `<YYYY>/<MM>`). `None` means "use the `created` fallback only".
1226fn primary_date_field(type_: &str) -> Option<&'static str> {
1227    match type_ {
1228        "email" => Some("date"),
1229        "transcript" => Some("recorded_at"),
1230        "pdf-source" => Some("received_at"),
1231        "note" => Some("told_at"),
1232        "expense" | "invoice" | "meeting" => Some("date"),
1233        // recognized custom event types have no canonical date field name; they
1234        // fall back to `created`.
1235        _ => None,
1236    }
1237}
1238
1239/// Parse a YAML value into an RFC3339 [`DateTime`], accepting both an explicit
1240/// string and a YAML-native scalar rendered to string.
1241fn value_to_datetime(value: &serde_norway::Value) -> Option<DateTime<FixedOffset>> {
1242    let s = yaml_scalar_string(value)?;
1243    DateTime::parse_from_rfc3339(s.trim()).ok()
1244}
1245
1246/// Extract `(YYYY, MM)` from a YAML date/timestamp value. Lenient: matches a
1247/// leading `YYYY-MM` so a bare `2026-05-22` date and a full
1248/// `2026-05-22T10:00:00-07:00` timestamp both work.
1249fn value_to_year_month(value: &serde_norway::Value) -> Option<(String, String)> {
1250    let s = yaml_scalar_string(value)?;
1251    year_month_from_str(s.trim())
1252}
1253
1254/// `(YYYY, MM)` from the leading `YYYY-MM` of a date string.
1255fn year_month_from_str(s: &str) -> Option<(String, String)> {
1256    // Hand-roll the leading-`YYYY-MM` parse to avoid a regex compile on the
1257    // write path. Require: 4 digits, '-', 2 digits.
1258    let bytes = s.as_bytes();
1259    if bytes.len() < 7 {
1260        return None;
1261    }
1262    let is_digit = |b: u8| b.is_ascii_digit();
1263    if !(is_digit(bytes[0])
1264        && is_digit(bytes[1])
1265        && is_digit(bytes[2])
1266        && is_digit(bytes[3])
1267        && bytes[4] == b'-'
1268        && is_digit(bytes[5])
1269        && is_digit(bytes[6]))
1270    {
1271        return None;
1272    }
1273    let month: u8 = (bytes[5] - b'0') * 10 + (bytes[6] - b'0');
1274    if !(1..=12).contains(&month) {
1275        return None;
1276    }
1277    Some((s[0..4].to_string(), s[5..7].to_string()))
1278}
1279
1280/// Render a YAML scalar as a string: a real `String` verbatim, otherwise the
1281/// value's compact YAML serialization (covers timestamps that the YAML engine
1282/// may surface as a non-string scalar).
1283fn yaml_scalar_string(value: &serde_norway::Value) -> Option<String> {
1284    if let Some(s) = value.as_str() {
1285        return Some(s.to_string());
1286    }
1287    match value {
1288        serde_norway::Value::Null => None,
1289        serde_norway::Value::Mapping(_) | serde_norway::Value::Sequence(_) => None,
1290        other => serde_norway::to_string(other)
1291            .ok()
1292            .map(|s| s.trim().to_string()),
1293    }
1294}
1295
1296/// The YAML frontmatter block of a file: the text between a leading `---` fence
1297/// and the next `---` fence, exclusive. `None` if the file does not open with a
1298/// `---` fence on its first line.
1299fn frontmatter_block(text: &str) -> Option<&str> {
1300    // Tolerate a UTF-8 BOM and CRLF, but the fence must be the very first line.
1301    let body = text.strip_prefix('\u{feff}').unwrap_or(text);
1302    let mut rest = body;
1303    // First line must be exactly `---` (allowing trailing CR).
1304    let (first, after_first) = split_first_line(rest);
1305    if first.trim_end_matches('\r') != "---" {
1306        return None;
1307    }
1308    rest = after_first;
1309    let block_start = rest;
1310    let mut scanned = 0usize;
1311    loop {
1312        let (line, after) = split_first_line(rest);
1313        if line.trim_end_matches('\r') == "---" {
1314            return Some(&block_start[..scanned]);
1315        }
1316        if after.is_empty() && line.is_empty() {
1317            // Reached end of input without a closing fence.
1318            return None;
1319        }
1320        scanned += line.len() + 1; // +1 for the consumed '\n'
1321        if after.is_empty() {
1322            return None;
1323        }
1324        rest = after;
1325    }
1326}
1327
1328/// Split a string into (first line without its trailing `\n`, remainder after
1329/// the `\n`). If there is no newline, the whole string is the line and the
1330/// remainder is empty.
1331fn split_first_line(s: &str) -> (&str, &str) {
1332    match s.find('\n') {
1333        Some(i) => (&s[..i], &s[i + 1..]),
1334        None => (s, ""),
1335    }
1336}
1337
1338/// True if an [`IndexRecord`] has a field `key` equal to `value`, checking the
1339/// typed columns first and then the flattened `fields` map.
1340fn record_matches_field(record: &IndexRecord, key: &str, value: &str) -> bool {
1341    match key {
1342        "type" => record.type_ == value,
1343        "summary" => record.summary == value,
1344        "path" => record.path.to_string_lossy() == value,
1345        "created" => timestamp_matches(record.created, value),
1346        "updated" => timestamp_matches(record.updated, value),
1347        "tags" => record.tags.iter().any(|t| t == value),
1348        "links" => record.links.iter().any(|l| l == value),
1349        other => record
1350            .fields
1351            .get(other)
1352            .map(|v| json_value_matches(v, value))
1353            .unwrap_or(false),
1354    }
1355}
1356
1357/// Compare a record's `created`/`updated` instant against a query `value`.
1358///
1359/// db.md files write timestamps in several equivalent RFC3339 spellings — most
1360/// commonly the `Z` UTC designator (`2026-05-01T00:00:00Z`) but also an explicit
1361/// offset (`...+00:00`, `...-07:00`). A naive `record.created.to_rfc3339() ==
1362/// value` reformats only one side: chrono renders a UTC instant as `+00:00`, so
1363/// the `Z` form an agent reads straight out of the file would never match. We
1364/// instead parse `value` as RFC3339 and compare instants, where `Z` and `+00:00`
1365/// (and any same-instant offset) are equal. A `value` that is not valid RFC3339
1366/// can never equal a real timestamp, so it falls through to `false`.
1367fn timestamp_matches(stored: Option<DateTime<FixedOffset>>, value: &str) -> bool {
1368    match (stored, DateTime::parse_from_rfc3339(value)) {
1369        (Some(stored), Ok(queried)) => stored == queried,
1370        _ => false,
1371    }
1372}
1373
1374/// Compare a JSON field value against a query string. A string matches
1375/// verbatim; scalars match their textual form; an array matches if any element
1376/// matches (so a list-valued frontmatter field is membership-queried).
1377fn json_value_matches(v: &serde_json::Value, value: &str) -> bool {
1378    match v {
1379        serde_json::Value::String(s) => s == value,
1380        serde_json::Value::Bool(b) => b.to_string() == value,
1381        serde_json::Value::Number(n) => n.to_string() == value,
1382        serde_json::Value::Array(items) => items.iter().any(|i| json_value_matches(i, value)),
1383        // A present-but-null field never matches — consistent with the in-memory
1384        // post-filter (`query::json_value_matches`, which the first `where`
1385        // clause is NOT re-checked against, so the two must agree here or a
1386        // `--where field=` query would return different rows than `--type X
1387        // --where field=`).
1388        serde_json::Value::Null => false,
1389        serde_json::Value::Object(_) => false,
1390    }
1391}
1392
1393#[cfg(test)]
1394mod tests {
1395    use super::*;
1396    use std::fs;
1397    use tempfile::{tempdir, TempDir};
1398
1399    // ── Fixtures ────────────────────────────────────────────────────────────
1400
1401    /// Write `contents` to `<root>/<rel>`, creating parent dirs. Returns the
1402    /// store-relative path for convenient assertions.
1403    fn write(root: &Path, rel: &str, contents: &str) -> PathBuf {
1404        let abs = root.join(rel);
1405        fs::create_dir_all(abs.parent().unwrap()).unwrap();
1406        fs::write(&abs, contents).unwrap();
1407        PathBuf::from(rel)
1408    }
1409
1410    /// A minimal content file with the given `updated` timestamp in frontmatter.
1411    fn content_md(updated: &str) -> String {
1412        format!(
1413            "---\ntype: note\ncreated: {updated}\nupdated: {updated}\nsummary: a note\n---\n\nbody\n"
1414        )
1415    }
1416
1417    /// A bare directory with a `DB.md` marker (valid `db-md` frontmatter so the
1418    /// real parser is exercised).
1419    fn empty_store() -> TempDir {
1420        let dir = tempdir().unwrap();
1421        fs::write(
1422            dir.path().join("DB.md"),
1423            "---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n",
1424        )
1425        .unwrap();
1426        dir
1427    }
1428
1429    /// Open a store rooted at a TempDir; panics if `open` rejects it.
1430    fn open(dir: &TempDir) -> Store {
1431        Store::open(dir.path()).expect("fixture should be a valid store")
1432    }
1433
1434    fn rels(paths: &[PathBuf]) -> Vec<String> {
1435        paths
1436            .iter()
1437            .map(|p| p.to_string_lossy().replace('\\', "/"))
1438            .collect()
1439    }
1440
1441    // ── Layer ───────────────────────────────────────────────────────────────
1442
1443    #[test]
1444    fn layer_dir_name_and_parse_are_inverse() {
1445        for layer in Layer::all() {
1446            assert_eq!(Layer::from_dir_name(layer.dir_name()), Some(layer));
1447        }
1448        assert_eq!(Layer::Sources.dir_name(), "sources");
1449        assert_eq!(Layer::Records.dir_name(), "records");
1450        // `wiki` is no longer a layer (the wiki/ layer was removed); it parses to None.
1451        assert_eq!(Layer::from_dir_name("wiki"), None);
1452        assert_eq!(Layer::from_dir_name("log"), None);
1453        assert_eq!(Layer::from_dir_name("Sources"), None); // case-sensitive
1454    }
1455
1456    #[test]
1457    fn layer_order_is_canonical() {
1458        // stats keys a BTreeMap on Layer; the sort order must be sources<records.
1459        let mut v = [Layer::Records, Layer::Sources];
1460        v.sort();
1461        assert_eq!(v, [Layer::Sources, Layer::Records]);
1462    }
1463
1464    // ── is_db_md_store / open ────────────────────────────────────────────────
1465
1466    #[test]
1467    fn is_store_true_only_with_uppercase_marker() {
1468        let dir = tempdir().unwrap();
1469        assert!(
1470            !Store::is_db_md_store(dir.path()),
1471            "no marker → not a store"
1472        );
1473
1474        fs::write(dir.path().join("DB.md"), "---\ntype: db-md\n---\n").unwrap();
1475        assert!(Store::is_db_md_store(dir.path()), "uppercase DB.md → store");
1476    }
1477
1478    #[test]
1479    fn is_store_false_for_lowercase_db_md() {
1480        // The case-sensitivity contract: a lowercase db.md is the spec name, not
1481        // a marker — even on a case-insensitive filesystem where Path::exists
1482        // would lie. This test must pass on macOS (case-insensitive) too.
1483        let dir = tempdir().unwrap();
1484        fs::write(dir.path().join("db.md"), "---\ntype: db-md\n---\n").unwrap();
1485        assert!(
1486            !Store::is_db_md_store(dir.path()),
1487            "lowercase db.md must NOT be treated as a store marker"
1488        );
1489        assert!(Store::open(dir.path()).is_err());
1490    }
1491
1492    #[test]
1493    fn is_store_false_when_db_md_is_a_directory() {
1494        let dir = tempdir().unwrap();
1495        fs::create_dir(dir.path().join("DB.md")).unwrap();
1496        assert!(
1497            !Store::is_db_md_store(dir.path()),
1498            "a directory named DB.md is not the file marker"
1499        );
1500    }
1501
1502    #[test]
1503    fn open_rejects_non_store_with_path() {
1504        let dir = tempdir().unwrap();
1505        let err = Store::open(dir.path()).unwrap_err();
1506        assert_eq!(err.path, dir.path());
1507    }
1508
1509    #[test]
1510    fn open_succeeds_and_parses_config() {
1511        let dir = tempdir().unwrap();
1512        // A DB.md whose ## Policies declares a frozen page — proves open()
1513        // actually parsed the config rather than substituting a default.
1514        fs::write(
1515            dir.path().join("DB.md"),
1516            "---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n\n\
1517             ## Policies\n\n### Frozen pages\n- records/decisions/q1.md\n",
1518        )
1519        .unwrap();
1520        let store = Store::open(dir.path()).unwrap();
1521        assert_eq!(store.root, dir.path());
1522        assert!(
1523            store
1524                .config
1525                .frozen_pages
1526                .iter()
1527                .any(|p| p == Path::new("records/decisions/q1.md")),
1528            "open() must surface DB.md ## Policies, got {:?}",
1529            store.config.frozen_pages
1530        );
1531    }
1532
1533    // ── walk / walk_layer / walk_type_folder ─────────────────────────────────
1534
1535    #[test]
1536    fn walk_collects_content_across_layers_skipping_meta_and_log() {
1537        let dir = empty_store();
1538        let root = dir.path();
1539        write(
1540            root,
1541            "sources/emails/2026/05/a.md",
1542            &content_md("2026-05-01T00:00:00Z"),
1543        );
1544        write(
1545            root,
1546            "records/contacts/sarah.md",
1547            &content_md("2026-05-02T00:00:00Z"),
1548        );
1549        write(
1550            root,
1551            "records/profiles/sarah.md",
1552            &content_md("2026-05-03T00:00:00Z"),
1553        );
1554        // Things walk() must SKIP:
1555        write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); // catalog
1556        write(root, "index.md", "---\ntype: index\n---\n"); // root catalog
1557        write(root, "log.md", "---\ntype: log\n---\n"); // log
1558        write(root, "log/2026-04.md", "---\ntype: log\n---\n"); // rotated log archive
1559        write(
1560            root,
1561            "sources/.hidden/secret.md",
1562            &content_md("2026-05-09T00:00:00Z"),
1563        ); // hidden dir
1564        write(root, "records/contacts/notes.txt", "not markdown"); // non-md
1565
1566        let store = open(&dir);
1567        let got = rels(&store.walk().unwrap());
1568        assert_eq!(
1569            got,
1570            vec![
1571                "records/contacts/sarah.md".to_string(),
1572                "records/profiles/sarah.md".to_string(),
1573                "sources/emails/2026/05/a.md".to_string(),
1574            ]
1575        );
1576    }
1577
1578    #[test]
1579    fn walk_includes_content_named_log_md_or_db_md_inside_a_layer() {
1580        let dir = empty_store();
1581        let root = dir.path();
1582        // A content file that merely happens to be named log.md / DB.md INSIDE a
1583        // layer is real content — those names are reserved only at the store root.
1584        write(
1585            root,
1586            "records/configs/log.md",
1587            &content_md("2026-05-01T00:00:00Z"),
1588        );
1589        write(
1590            root,
1591            "sources/docs/DB.md",
1592            &content_md("2026-05-02T00:00:00Z"),
1593        );
1594        // The derived catalog twin is still skipped at any depth.
1595        write(root, "records/configs/index.md", "---\ntype: index\n---\n");
1596        let store = open(&dir);
1597        let got = rels(&store.walk().unwrap());
1598        assert!(
1599            got.contains(&"records/configs/log.md".to_string()),
1600            "layer-internal log.md is content: {got:?}"
1601        );
1602        assert!(
1603            got.contains(&"sources/docs/DB.md".to_string()),
1604            "layer-internal DB.md is content: {got:?}"
1605        );
1606        assert!(
1607            !got.iter().any(|p| p.ends_with("index.md")),
1608            "index.md is still skipped: {got:?}"
1609        );
1610    }
1611
1612    #[test]
1613    fn walk_layer_is_scoped() {
1614        let dir = empty_store();
1615        let root = dir.path();
1616        write(
1617            root,
1618            "sources/emails/2026/05/a.md",
1619            &content_md("2026-05-01T00:00:00Z"),
1620        );
1621        write(
1622            root,
1623            "records/contacts/sarah.md",
1624            &content_md("2026-05-02T00:00:00Z"),
1625        );
1626        let store = open(&dir);
1627
1628        assert_eq!(
1629            rels(&store.walk_layer(Layer::Sources).unwrap()),
1630            vec!["sources/emails/2026/05/a.md".to_string()]
1631        );
1632        assert_eq!(
1633            rels(&store.walk_layer(Layer::Records).unwrap()),
1634            vec!["records/contacts/sarah.md".to_string()]
1635        );
1636        // A layer with no directory is empty, not an error: a store with only a
1637        // sources/ tree has no records/ dir, so walking Records is empty.
1638        let only_sources = empty_store();
1639        write(
1640            only_sources.path(),
1641            "sources/emails/2026/05/a.md",
1642            &content_md("2026-05-01T00:00:00Z"),
1643        );
1644        let s2 = open(&only_sources);
1645        assert!(s2.walk_layer(Layer::Records).unwrap().is_empty());
1646    }
1647
1648    #[test]
1649    fn walk_type_folder_recurses_shards_and_accepts_abs_or_rel() {
1650        let dir = empty_store();
1651        let root = dir.path();
1652        write(
1653            root,
1654            "sources/emails/2026/05/a.md",
1655            &content_md("2026-05-01T00:00:00Z"),
1656        );
1657        write(
1658            root,
1659            "sources/emails/2026/06/b.md",
1660            &content_md("2026-06-01T00:00:00Z"),
1661        );
1662        write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); // skipped
1663                                                                           // A different type folder must not leak in.
1664        write(
1665            root,
1666            "sources/docs/2026/05/c.md",
1667            &content_md("2026-05-04T00:00:00Z"),
1668        );
1669        let store = open(&dir);
1670
1671        let expected = vec![
1672            "sources/emails/2026/05/a.md".to_string(),
1673            "sources/emails/2026/06/b.md".to_string(),
1674        ];
1675        // Relative folder arg.
1676        assert_eq!(
1677            rels(&store.walk_type_folder(Path::new("sources/emails")).unwrap()),
1678            expected
1679        );
1680        // Absolute folder arg under the store resolves identically.
1681        assert_eq!(
1682            rels(
1683                &store
1684                    .walk_type_folder(&root.join("sources/emails"))
1685                    .unwrap()
1686            ),
1687            expected
1688        );
1689    }
1690
1691    // ── recent_in_type_folder ────────────────────────────────────────────────
1692
1693    #[test]
1694    fn recent_orders_by_updated_desc_then_path_and_caps() {
1695        let dir = empty_store();
1696        let root = dir.path();
1697        // newest
1698        write(
1699            root,
1700            "records/meetings/2026/05/c.md",
1701            &content_md("2026-05-03T00:00:00Z"),
1702        );
1703        // tie on updated — path asc decides (a before b)
1704        write(
1705            root,
1706            "records/meetings/2026/05/a.md",
1707            &content_md("2026-05-02T00:00:00Z"),
1708        );
1709        write(
1710            root,
1711            "records/meetings/2026/05/b.md",
1712            &content_md("2026-05-02T00:00:00Z"),
1713        );
1714        // oldest
1715        write(
1716            root,
1717            "records/meetings/2026/04/z.md",
1718            &content_md("2026-04-01T00:00:00Z"),
1719        );
1720        let store = open(&dir);
1721
1722        let all = rels(
1723            &store
1724                .recent_in_type_folder(Path::new("records/meetings"), 10)
1725                .unwrap(),
1726        );
1727        assert_eq!(
1728            all,
1729            vec![
1730                "records/meetings/2026/05/c.md".to_string(), // newest
1731                "records/meetings/2026/05/a.md".to_string(), // tie, path asc
1732                "records/meetings/2026/05/b.md".to_string(),
1733                "records/meetings/2026/04/z.md".to_string(), // oldest
1734            ]
1735        );
1736
1737        // Cap takes the n most-recent.
1738        let top2 = rels(
1739            &store
1740                .recent_in_type_folder(Path::new("records/meetings"), 2)
1741                .unwrap(),
1742        );
1743        assert_eq!(
1744            top2,
1745            vec![
1746                "records/meetings/2026/05/c.md".to_string(),
1747                "records/meetings/2026/05/a.md".to_string(),
1748            ]
1749        );
1750    }
1751
1752    #[test]
1753    fn recent_sorts_undated_files_last() {
1754        let dir = empty_store();
1755        let root = dir.path();
1756        write(
1757            root,
1758            "records/contacts/dated.md",
1759            &content_md("2026-05-01T00:00:00Z"),
1760        );
1761        // No `updated` field at all.
1762        write(
1763            root,
1764            "records/contacts/undated.md",
1765            "---\ntype: contact\nsummary: x\n---\nbody\n",
1766        );
1767        let store = open(&dir);
1768        let got = rels(
1769            &store
1770                .recent_in_type_folder(Path::new("records/contacts"), 10)
1771                .unwrap(),
1772        );
1773        assert_eq!(
1774            got,
1775            vec![
1776                "records/contacts/dated.md".to_string(),
1777                "records/contacts/undated.md".to_string(),
1778            ],
1779            "a file with a real `updated` must outrank one with none"
1780        );
1781    }
1782
1783    // ── type_shards ──────────────────────────────────────────────────────────
1784
1785    #[test]
1786    fn type_shards_classification() {
1787        let dir = empty_store();
1788        let store = open(&dir);
1789        for t in [
1790            "email",
1791            "transcript",
1792            "pdf-source",
1793            "expense",
1794            "invoice",
1795            "meeting",
1796            "order",
1797            "ticket",
1798            "transaction",
1799        ] {
1800            assert!(store.type_shards(t), "{t} should shard");
1801        }
1802        for t in [
1803            "contact",
1804            "company",
1805            "decision",
1806            "wiki-page",
1807            "index",
1808            "log",
1809            "db-md",
1810            "proposal",
1811        ] {
1812            assert!(!store.type_shards(t), "{t} should stay flat");
1813        }
1814    }
1815
1816    #[test]
1817    fn type_shards_respects_schema_directive_both_directions() {
1818        use crate::parser::{Config, Schema};
1819        let dir = empty_store();
1820        let mut store = open(&dir);
1821        let mut config = Config::default();
1822        // A CUSTOM type (not in the built-in list) opts into date-sharding —
1823        // without the schema override `type_shards` would return false for it.
1824        config.schemas.insert(
1825            "shipment".to_string(),
1826            Schema {
1827                shard: Some(true),
1828                ..Schema::default()
1829            },
1830        );
1831        // A BUILT-IN event type opts OUT (flat) — the override wins over the
1832        // built-in default.
1833        config.schemas.insert(
1834            "expense".to_string(),
1835            Schema {
1836                shard: Some(false),
1837                ..Schema::default()
1838            },
1839        );
1840        // A schema with no `shard:` directive leaves the built-in default intact.
1841        config
1842            .schemas
1843            .insert("meeting".to_string(), Schema::default());
1844        store.config = config;
1845
1846        assert!(
1847            store.type_shards("shipment"),
1848            "custom type with `shard: by-date` must shard"
1849        );
1850        assert!(
1851            !store.type_shards("expense"),
1852            "built-in event type with `shard: flat` must go flat"
1853        );
1854        assert!(
1855            store.type_shards("meeting"),
1856            "schema without a `shard:` directive keeps the built-in default"
1857        );
1858        assert!(
1859            !store.type_shards("contact"),
1860            "unconfigured entity type stays flat"
1861        );
1862    }
1863
1864    // ── shard_path_for ───────────────────────────────────────────────────────
1865
1866    fn fm_with_extra(key: &str, value: &str) -> Frontmatter {
1867        let mut fm = Frontmatter::default();
1868        fm.extra.insert(
1869            key.to_string(),
1870            serde_norway::Value::String(value.to_string()),
1871        );
1872        fm
1873    }
1874
1875    fn fm_with_created(rfc3339: &str) -> Frontmatter {
1876        Frontmatter {
1877            created: Some(DateTime::parse_from_rfc3339(rfc3339).unwrap()),
1878            ..Default::default()
1879        }
1880    }
1881
1882    #[test]
1883    fn shard_path_uses_primary_date_field_per_type() {
1884        let dir = empty_store();
1885        let store = open(&dir);
1886
1887        // expense.date → records/expenses/<YYYY>/<MM>/
1888        let p = store
1889            .shard_path_for("expense", &fm_with_extra("date", "2026-05-22"), "lunch")
1890            .unwrap();
1891        assert_eq!(p, PathBuf::from("records/expenses/2026/05/lunch.md"));
1892
1893        // email.date → sources/emails/<YYYY>/<MM>/
1894        let p = store
1895            .shard_path_for(
1896                "email",
1897                &fm_with_extra("date", "2026-11-02T09:00:00-07:00"),
1898                "e1",
1899            )
1900            .unwrap();
1901        assert_eq!(p, PathBuf::from("sources/emails/2026/11/e1.md"));
1902
1903        // transcript.recorded_at → sources/transcripts/<YYYY>/<MM>/
1904        let p = store
1905            .shard_path_for(
1906                "transcript",
1907                &fm_with_extra("recorded_at", "2025-01-15T12:00:00Z"),
1908                "t1",
1909            )
1910            .unwrap();
1911        assert_eq!(p, PathBuf::from("sources/transcripts/2025/01/t1.md"));
1912    }
1913
1914    #[test]
1915    fn shard_path_falls_back_to_created() {
1916        let dir = empty_store();
1917        let store = open(&dir);
1918        // meeting with no `date` field but a `created` timestamp.
1919        let p = store
1920            .shard_path_for(
1921                "meeting",
1922                &fm_with_created("2024-07-09T08:30:00-04:00"),
1923                "sync",
1924            )
1925            .unwrap();
1926        assert_eq!(p, PathBuf::from("records/meetings/2024/07/sync.md"));
1927    }
1928
1929    #[test]
1930    fn shard_path_primary_field_wins_over_created() {
1931        let dir = empty_store();
1932        let store = open(&dir);
1933        let mut fm = fm_with_created("2020-01-01T00:00:00Z");
1934        fm.extra.insert(
1935            "date".into(),
1936            serde_norway::Value::String("2026-05-22".into()),
1937        );
1938        let p = store.shard_path_for("expense", &fm, "x").unwrap();
1939        // The primary `date` (2026/05), not `created` (2020/01), drives the shard.
1940        assert_eq!(p, PathBuf::from("records/expenses/2026/05/x.md"));
1941    }
1942
1943    #[test]
1944    fn shard_path_flat_types_have_no_shard_segment() {
1945        let dir = empty_store();
1946        let store = open(&dir);
1947        // A contact has a `created` date, but contacts stay flat.
1948        let p = store
1949            .shard_path_for(
1950                "contact",
1951                &fm_with_created("2026-05-22T00:00:00Z"),
1952                "sarah-chen",
1953            )
1954            .unwrap();
1955        assert_eq!(p, PathBuf::from("records/contacts/sarah-chen.md"));
1956
1957        // wiki-page is now an unrecognized type: it is flat (no date shard) and
1958        // lands under the records-layer fallback folder `records/<type>` —
1959        // `records/wiki-page/<name>.md`, a conforming 3-component
1960        // `<layer>/<type-folder>/<file>` path. A 2-component path would be
1961        // invisible to the index/validate type-folder model.
1962        let p = store
1963            .shard_path_for("wiki-page", &Frontmatter::default(), "renewal-theme")
1964            .unwrap();
1965        assert_eq!(p, PathBuf::from("records/wiki-page/renewal-theme.md"));
1966    }
1967
1968    /// Regression: a type written through the toolkit's own path computation
1969    /// must land at a path the index + validate type-folder model accepts. A
1970    /// 2-component `<layer>/<file>` path is one `type_folder_of` (in both `index`
1971    /// and `validate`) treats as "no type-folder" — it would either crash
1972    /// `Index::on_write` (it tried to create `index.md` inside a file) or be
1973    /// silently dropped from every catalog by `Index::rebuild_all`. `wiki-page`
1974    /// is now an unrecognized type, so it falls back to `records/wiki-page` —
1975    /// still a conforming 3-component `<layer>/<type-folder>/<file>` path.
1976    #[test]
1977    fn shard_path_wiki_page_is_indexable_three_component_path() {
1978        let dir = empty_store();
1979        let store = open(&dir);
1980        let p = store
1981            .shard_path_for("wiki-page", &Frontmatter::default(), "renewal-theme")
1982            .unwrap();
1983        // First two components are a layer + a non-empty type-folder segment;
1984        // the file is the third. This is exactly the shape `type_folder_of`
1985        // (`comps.len() >= 3`, `comps[0]` a known layer) requires.
1986        let comps: Vec<&str> = p.iter().filter_map(|c| c.to_str()).collect();
1987        assert_eq!(
1988            comps.len(),
1989            3,
1990            "wiki-page path must be <layer>/<type-folder>/<file>, got {p:?}"
1991        );
1992        assert_eq!(
1993            comps[0], "records",
1994            "first component must be the records layer (wiki-page is now an \
1995             unrecognized type, filed under the records fallback)"
1996        );
1997        assert!(
1998            !comps[1].is_empty() && comps[1] != "renewal-theme.md",
1999            "second component must be a real type-folder, not the file: {p:?}"
2000        );
2001        assert!(
2002            comps[2].ends_with(".md"),
2003            "third component must be the .md file: {p:?}"
2004        );
2005    }
2006
2007    #[test]
2008    fn shard_path_preserves_and_adds_md_extension() {
2009        let dir = empty_store();
2010        let store = open(&dir);
2011        let with = store
2012            .shard_path_for("contact", &Frontmatter::default(), "sarah.md")
2013            .unwrap();
2014        let without = store
2015            .shard_path_for("contact", &Frontmatter::default(), "sarah")
2016            .unwrap();
2017        assert_eq!(with, PathBuf::from("records/contacts/sarah.md"));
2018        assert_eq!(without, PathBuf::from("records/contacts/sarah.md"));
2019    }
2020
2021    #[test]
2022    fn shard_path_errors_when_sharding_type_has_no_date() {
2023        let dir = empty_store();
2024        let store = open(&dir);
2025        // expense shards, but no `date` and no `created` → NoShardDate.
2026        let err = store
2027            .shard_path_for("expense", &Frontmatter::default(), "mystery")
2028            .unwrap_err();
2029        match err {
2030            StoreError::NoShardDate { file } => {
2031                assert_eq!(file, PathBuf::from("records/expenses/mystery.md"));
2032            }
2033            other => panic!("expected NoShardDate, got {other:?}"),
2034        }
2035    }
2036
2037    // ── find_links_to ────────────────────────────────────────────────────────
2038
2039    #[test]
2040    fn find_links_to_matches_all_accepted_spellings() {
2041        let dir = empty_store();
2042        let root = dir.path();
2043        let target = "records/contacts/sarah-chen";
2044
2045        // Plain link.
2046        write(
2047            root,
2048            "wiki/people/sarah.md",
2049            &format!("---\ntype: wiki-page\nsummary: s\n---\nSee [[{target}]].\n"),
2050        );
2051        // Link with display text.
2052        write(
2053            root,
2054            "records/meetings/2026/05/m.md",
2055            &format!("---\ntype: meeting\nsummary: s\n---\nWith [[{target}|Sarah]].\n"),
2056        );
2057        // Link with .md extension (accepted, warned by validate).
2058        write(
2059            root,
2060            "wiki/themes/t.md",
2061            &format!("---\ntype: wiki-page\nsummary: s\n---\n[[{target}.md]]\n"),
2062        );
2063        // A catalog/index file also contains the link literally — included.
2064        write(
2065            root,
2066            "records/contacts/index.md",
2067            &format!("---\ntype: index\n---\n- [[{target}]] — Sarah\n"),
2068        );
2069        // No link to the target.
2070        write(
2071            root,
2072            "wiki/people/elena.md",
2073            "---\ntype: wiki-page\nsummary: s\n---\nNo links here.\n",
2074        );
2075        // Short-form link must NOT match the full-path target.
2076        write(
2077            root,
2078            "wiki/people/bob.md",
2079            "---\ntype: wiki-page\nsummary: s\n---\n[[sarah-chen]]\n",
2080        );
2081        // A longer path that merely starts with the target must NOT match
2082        // (boundary correctness): target `sarah-chen` vs `sarah-chen-jr`.
2083        write(
2084            root,
2085            "wiki/people/jr.md",
2086            &format!("---\ntype: wiki-page\nsummary: s\n---\n[[{target}-jr]]\n"),
2087        );
2088
2089        let store = open(&dir);
2090        let got = rels(&store.find_links_to(Path::new(target)).unwrap());
2091        assert_eq!(
2092            got,
2093            vec![
2094                "records/contacts/index.md".to_string(),
2095                "records/meetings/2026/05/m.md".to_string(),
2096                "wiki/people/sarah.md".to_string(),
2097                "wiki/themes/t.md".to_string(),
2098            ]
2099        );
2100    }
2101
2102    #[test]
2103    fn find_links_to_distinguishes_sibling_paths() {
2104        // Two contacts whose paths share a prefix; a link to one must not be
2105        // reported as a link to the other.
2106        let dir = empty_store();
2107        let root = dir.path();
2108        write(
2109            root,
2110            "wiki/a.md",
2111            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah]]\n",
2112        );
2113        write(
2114            root,
2115            "wiki/b.md",
2116            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
2117        );
2118        let store = open(&dir);
2119
2120        assert_eq!(
2121            rels(
2122                &store
2123                    .find_links_to(Path::new("records/contacts/sarah"))
2124                    .unwrap()
2125            ),
2126            vec!["wiki/a.md".to_string()]
2127        );
2128        assert_eq!(
2129            rels(
2130                &store
2131                    .find_links_to(Path::new("records/contacts/sarah-chen"))
2132                    .unwrap()
2133            ),
2134            vec!["wiki/b.md".to_string()]
2135        );
2136    }
2137
2138    #[test]
2139    fn regression_find_links_to_tolerates_invalid_utf8_on_a_matched_line() {
2140        // Regression: a `.md` file can carry a stray non-UTF-8 byte on the SAME
2141        // line as a `[[target]]` link (a verbatim-ingested `sources/` artifact,
2142        // e.g. a mis-decoded Latin-1 import). The scan must still report the
2143        // link — `find_links_to` / `find_links_to_any` (and `graph backlinks` +
2144        // the working-set validate incoming-linker pass) must not error out and
2145        // drop the legitimate UTF-8 linkers. The content scan reads the file
2146        // with `String::from_utf8_lossy`, so the invalid byte becomes a
2147        // replacement char and the ASCII `[[target]]` link is still extracted.
2148        let dir = empty_store();
2149        let root = dir.path();
2150        let target = "records/contacts/sarah-chen";
2151
2152        // A clean, fully-UTF-8 linker that MUST be returned regardless.
2153        write(
2154            root,
2155            "wiki/people/clean.md",
2156            &format!("---\ntype: wiki-page\nsummary: s\n---\nSee [[{target}]].\n"),
2157        );
2158
2159        // A linker whose link line ALSO carries a stray 0xFF byte (a mis-decoded
2160        // Latin-1 import). Write raw bytes so the invalid byte survives — a
2161        // `&str` fixture could not express it. The byte-level regex still
2162        // matches `[[target]]` on this line; pre-fix the UTF8 sink aborted here.
2163        let mut bytes: Vec<u8> =
2164            b"---\ntype: email\nsummary: s\n---\nSee [[records/contacts/sarah-chen]] \xFF here\n"
2165                .to_vec();
2166        let dirty_abs = root.join("sources/emails/2026/05/raw.md");
2167        fs::create_dir_all(dirty_abs.parent().unwrap()).unwrap();
2168        fs::write(&dirty_abs, &bytes).unwrap();
2169        // Defensive: confirm the fixture really is invalid UTF-8 (so the test
2170        // exercises the bug, not a coincidentally-valid file).
2171        assert!(
2172            std::str::from_utf8(&bytes).is_err(),
2173            "fixture must contain invalid UTF-8 to exercise the regression"
2174        );
2175        bytes.clear();
2176
2177        let store = open(&dir);
2178        let got = rels(
2179            &store
2180                .find_links_to(Path::new(target))
2181                .expect("a stray non-UTF-8 byte must not abort the backlink scan"),
2182        );
2183        assert_eq!(
2184            got,
2185            vec![
2186                "sources/emails/2026/05/raw.md".to_string(),
2187                "wiki/people/clean.md".to_string(),
2188            ],
2189            "both the clean linker and the one with an invalid byte on the link \
2190             line are reported; the scan degrades, it does not fail"
2191        );
2192    }
2193
2194    // ── find_links_to_any (batch — the O(changed × store) fix) ─────────────────
2195
2196    /// The working-set validate's incoming-linker discovery runs through
2197    /// `find_links_to_any` over the WHOLE changed set in one pass. This pins the
2198    /// batch contract that makes that single-pass behavior correct: the result is
2199    /// the union of incoming linkers across every target, with per-target
2200    /// boundary correctness preserved (no alternation arm bleeds into a
2201    /// prefix-sharing sibling). If a regression reverts the batch finder to a
2202    /// per-object loop, the union below would still hold — but the boundary +
2203    /// union-equivalence assertions are what guard the *correctness* of folding N
2204    /// scans into one regex.
2205    #[test]
2206    fn find_links_to_any_returns_the_union_with_boundary_correctness() {
2207        let dir = empty_store();
2208        let root = dir.path();
2209
2210        // Two distinct targets, each with its own linker.
2211        write(
2212            root,
2213            "wiki/links-sarah.md",
2214            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
2215        );
2216        write(
2217            root,
2218            "wiki/links-acme.md",
2219            "---\ntype: wiki-page\nsummary: s\n---\nDeal with [[records/companies/acme|Acme]].\n",
2220        );
2221        // One file links to BOTH targets — must appear exactly once (deduped),
2222        // proving the per-file early-exit folds multiple-target hits into a
2223        // single result row rather than one row per matched target.
2224        write(
2225            root,
2226            "records/meetings/2026/05/m.md",
2227            "---\ntype: meeting\nsummary: s\n---\n[[records/contacts/sarah-chen]] re \
2228             [[records/companies/acme]]\n",
2229        );
2230        // A prefix-sharing sibling of a target: a link to `sarah-chen-jr` must NOT
2231        // be reported as a link to `sarah-chen` even though the alternation now
2232        // carries `sarah-chen` as one arm.
2233        write(
2234            root,
2235            "wiki/links-jr.md",
2236            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen-jr]]\n",
2237        );
2238        // A file that links to neither requested target.
2239        write(
2240            root,
2241            "wiki/unrelated.md",
2242            "---\ntype: wiki-page\nsummary: s\n---\n[[wiki/themes/spend]]\n",
2243        );
2244
2245        let store = open(&dir);
2246        let targets = vec![
2247            PathBuf::from("records/contacts/sarah-chen"),
2248            PathBuf::from("records/companies/acme"),
2249        ];
2250
2251        let got = rels(&store.find_links_to_any(&targets).unwrap());
2252        assert_eq!(
2253            got,
2254            vec![
2255                "records/meetings/2026/05/m.md".to_string(),
2256                "wiki/links-acme.md".to_string(),
2257                "wiki/links-sarah.md".to_string(),
2258            ],
2259            "batch finder must return the deduped union of linkers across all \
2260             targets, excluding the prefix-sibling and the unrelated file"
2261        );
2262
2263        // Equivalence: the batch result must equal the union of the per-target
2264        // single finder. This is the property the working-set path relies on
2265        // when it folds one-scan-per-object into one scan for the whole set.
2266        let mut union: std::collections::BTreeSet<PathBuf> = std::collections::BTreeSet::new();
2267        for t in &targets {
2268            for linker in store.find_links_to(t).unwrap() {
2269                union.insert(linker);
2270            }
2271        }
2272        assert_eq!(
2273            rels(&union.into_iter().collect::<Vec<_>>()),
2274            got,
2275            "find_links_to_any must equal the union of per-target find_links_to"
2276        );
2277    }
2278
2279    /// An empty target set must scan nothing and find nothing — and crucially
2280    /// must NOT compile to a match-everything empty regex (which would report
2281    /// every `.md` as a linker). This is the empty-working-set fast path the
2282    /// `validate` loop hits when nothing changed.
2283    #[test]
2284    fn find_links_to_any_empty_targets_matches_nothing() {
2285        let dir = empty_store();
2286        let root = dir.path();
2287        write(
2288            root,
2289            "wiki/a.md",
2290            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
2291        );
2292        let store = open(&dir);
2293
2294        assert!(
2295            store.find_links_to_any(&[]).unwrap().is_empty(),
2296            "no targets ⇒ no linkers (an empty pattern must not match every file)"
2297        );
2298        // A set of only empty/non-link targets is likewise a no-op, not a
2299        // match-everything.
2300        assert!(
2301            store
2302                .find_links_to_any(&[PathBuf::from(""), PathBuf::from("./")])
2303                .unwrap()
2304                .is_empty(),
2305            "targets that render to empty link text contribute no alternation arm"
2306        );
2307    }
2308
2309    // ── read_type_index ──────────────────────────────────────────────────────
2310
2311    #[test]
2312    fn read_type_index_parses_records_and_flattens_fields() {
2313        let dir = empty_store();
2314        let root = dir.path();
2315        let jsonl = "\
2316{\"path\":\"records/expenses/2026/05/a.md\",\"type\":\"expense\",\"summary\":\"lunch\",\"tags\":[\"meals\"],\"links\":[\"records/companies/acme\"],\"created\":\"2026-05-01T00:00:00Z\",\"updated\":\"2026-05-01T00:00:00Z\",\"vendor\":\"acme\",\"amount\":42}
2317{\"path\":\"records/expenses/2026/05/b.md\",\"type\":\"expense\",\"summary\":\"taxi\",\"created\":null,\"updated\":null,\"vendor\":\"yellow\"}
2318";
2319        let p = write(root, "records/expenses/index.jsonl", jsonl);
2320        let store = open(&dir);
2321        let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
2322
2323        assert_eq!(recs.len(), 2);
2324        // Sorted by path asc.
2325        assert_eq!(recs[0].path, PathBuf::from("records/expenses/2026/05/a.md"));
2326        assert_eq!(recs[0].type_, "expense");
2327        assert_eq!(recs[0].summary, "lunch");
2328        assert_eq!(recs[0].tags, vec!["meals".to_string()]);
2329        assert_eq!(recs[0].links, vec!["records/companies/acme".to_string()]);
2330        assert!(recs[0].created.is_some());
2331        // Extra (non-typed) frontmatter flattens into `fields`.
2332        assert_eq!(
2333            recs[0].fields.get("vendor"),
2334            Some(&serde_json::json!("acme"))
2335        );
2336        assert_eq!(recs[0].fields.get("amount"), Some(&serde_json::json!(42)));
2337        // Defaults: missing tags/links → empty.
2338        assert!(recs[1].tags.is_empty());
2339        assert!(recs[1].links.is_empty());
2340    }
2341
2342    #[test]
2343    fn read_type_index_last_write_wins_and_skips_blanks() {
2344        let dir = empty_store();
2345        let root = dir.path();
2346        // Same path twice; the second line supersedes the first. A blank line
2347        // in between must be ignored, not error.
2348        let jsonl = "\
2349{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"old\",\"created\":null,\"updated\":null}
2350
2351{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"new\",\"created\":null,\"updated\":null}
2352";
2353        let p = write(root, "records/contacts/index.jsonl", jsonl);
2354        let store = open(&dir);
2355        let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
2356        assert_eq!(recs.len(), 1, "duplicate path collapses to one record");
2357        assert_eq!(recs[0].summary, "new", "later line must win");
2358    }
2359
2360    #[test]
2361    fn read_type_index_errors_on_malformed_line() {
2362        let dir = empty_store();
2363        let root = dir.path();
2364        let p = write(root, "records/contacts/index.jsonl", "{not valid json}\n");
2365        let store = open(&dir);
2366        let err = store.read_type_index(&store.abs_path(&p)).unwrap_err();
2367        assert!(matches!(err, StoreError::BadTypeIndex { .. }));
2368    }
2369
2370    // ── find_by_type / find_by_where ─────────────────────────────────────────
2371
2372    fn jsonl_line(path: &str, type_: &str, summary: &str, extra: &str) -> String {
2373        format!(
2374            "{{\"path\":\"{path}\",\"type\":\"{type_}\",\"summary\":\"{summary}\",\"created\":null,\"updated\":null{extra}}}\n"
2375        )
2376    }
2377
2378    #[test]
2379    fn find_by_type_reads_canonical_folder_sidecar() {
2380        let dir = empty_store();
2381        let root = dir.path();
2382        // Canonical folder for `contact` is records/contacts.
2383        write(
2384            root,
2385            "records/contacts/index.jsonl",
2386            &(jsonl_line("records/contacts/sarah.md", "contact", "Sarah", "")
2387                + &jsonl_line("records/contacts/elena.md", "contact", "Elena", "")),
2388        );
2389        // A different type's sidecar must not leak into a contact query.
2390        write(
2391            root,
2392            "records/companies/index.jsonl",
2393            &jsonl_line("records/companies/acme.md", "company", "Acme", ""),
2394        );
2395        let store = open(&dir);
2396        let recs = store.find_by_type("contact").unwrap();
2397        let names: Vec<_> = recs.iter().map(|r| r.summary.clone()).collect();
2398        assert_eq!(names, vec!["Elena".to_string(), "Sarah".to_string()]); // path-sorted
2399        assert!(recs.iter().all(|r| r.type_ == "contact"));
2400    }
2401
2402    #[test]
2403    fn regression_find_by_type_includes_non_canonical_folder_when_canonical_exists() {
2404        // Regression for the silent-incompleteness bug: once the canonical
2405        // type-folder sidecar exists, `find_by_type` used to read ONLY that
2406        // sidecar and drop same-type records filed in a non-canonical folder in
2407        // the SAME layer — so the result flipped to incomplete the moment a
2408        // canonical record was added. The write path actively enables such a
2409        // layout (`records/clients/` for a `contact`, `wiki/<topic>/` for any
2410        // `wiki-page`), so this is a reachable, dedup-breaking omission.
2411        let dir = empty_store();
2412        let root = dir.path();
2413
2414        // CANONICAL folder sidecar exists (`records/contacts/` for `contact`),
2415        // which is exactly the condition that triggered the bug.
2416        write(
2417            root,
2418            "records/contacts/index.jsonl",
2419            &jsonl_line("records/contacts/sarah.md", "contact", "Sarah", ""),
2420        );
2421        // A `contact` filed in a NON-canonical folder within the same (Records)
2422        // layer. Pre-fix this was silently dropped because the canonical
2423        // sidecar existed; it must now come back.
2424        write(
2425            root,
2426            "records/clients/index.jsonl",
2427            &jsonl_line("records/clients/elena.md", "contact", "Elena", ""),
2428        );
2429        // A different type in the same layer must NOT leak in (proves the read
2430        // is type-filtered, not just a blind whole-layer dump).
2431        write(
2432            root,
2433            "records/companies/index.jsonl",
2434            &jsonl_line("records/companies/acme.md", "company", "Acme", ""),
2435        );
2436
2437        let store = open(&dir);
2438        let got: std::collections::BTreeSet<String> = store
2439            .find_by_type("contact")
2440            .unwrap()
2441            .into_iter()
2442            .map(|r| r.path.to_string_lossy().into_owned())
2443            .collect();
2444        assert_eq!(
2445            got,
2446            ["records/clients/elena.md", "records/contacts/sarah.md"]
2447                .into_iter()
2448                .map(String::from)
2449                .collect::<std::collections::BTreeSet<_>>(),
2450            "both the canonical-folder and the non-canonical-folder contact must \
2451             be returned; the company record must be excluded"
2452        );
2453    }
2454
2455    #[test]
2456    fn regression_find_by_type_profile_spans_multiple_topic_folders() {
2457        // Regression for the scoped-backlinks variant of the same bug
2458        // (`graph backlinks --type <conclusion-type>`): a conclusion type like
2459        // `profile` has the canonical fallback folder `records/profile`, but the
2460        // agent may file profiles under ANY records topic folder
2461        // (`records/people/`, `records/clients/`, …). With a
2462        // `records/profile/index.jsonl` present, the old code read only that
2463        // folder and dropped profiles in the other topic folders —
2464        // under-reporting dependents in a blast-radius check. The
2465        // whole-`records/`-layer read must surface all of them.
2466        let dir = empty_store();
2467        let root = dir.path();
2468        write(
2469            root,
2470            "records/profile/index.jsonl",
2471            &jsonl_line("records/profile/billing.md", "profile", "Billing", ""),
2472        );
2473        write(
2474            root,
2475            "records/people/index.jsonl",
2476            &jsonl_line("records/people/sarah-chen.md", "profile", "Sarah Chen", ""),
2477        );
2478        write(
2479            root,
2480            "records/clients/index.jsonl",
2481            &jsonl_line("records/clients/atlas.md", "profile", "Atlas", ""),
2482        );
2483
2484        let store = open(&dir);
2485        let got: std::collections::BTreeSet<String> = store
2486            .find_by_type("profile")
2487            .unwrap()
2488            .into_iter()
2489            .map(|r| r.path.to_string_lossy().into_owned())
2490            .collect();
2491        assert_eq!(
2492            got,
2493            [
2494                "records/clients/atlas.md",
2495                "records/people/sarah-chen.md",
2496                "records/profile/billing.md",
2497            ]
2498            .into_iter()
2499            .map(String::from)
2500            .collect::<std::collections::BTreeSet<_>>(),
2501            "a profile query must return records from every topic folder, not \
2502             just the canonical records/profile/"
2503        );
2504    }
2505
2506    #[test]
2507    fn find_by_type_canonical_absent_falls_back_within_the_layer_only() {
2508        let dir = empty_store();
2509        let root = dir.path();
2510        // A custom `proposal` record filed under a non-canonical folder NAME
2511        // (the natural plural `records/proposals/`) inside the records layer.
2512        // `default_type_folder("proposal")` = `records/proposal` (bare type, no
2513        // pluralization guess), so the canonical sidecar does not exist and
2514        // `find_by_type` falls back. The fallback is bounded to the type's
2515        // layer (records), so this record — same layer, non-canonical folder —
2516        // is still found: completeness within the layer holds.
2517        write(
2518            root,
2519            "records/proposals/index.jsonl",
2520            &jsonl_line("records/proposals/p1.md", "proposal", "Q3 proposal", ""),
2521        );
2522        // A DECOY of the SAME type sitting in a DIFFERENT layer (sources/). The
2523        // old whole-store fallback read every sidecar in the store and would
2524        // have leaked this into the result; the layer-bounded fallback must not.
2525        // It also pins that the fallback is O(entities-in-layer), never O(store).
2526        write(
2527            root,
2528            "sources/proposals/index.jsonl",
2529            &jsonl_line(
2530                "sources/proposals/leak.md",
2531                "proposal",
2532                "cross-layer decoy",
2533                "",
2534            ),
2535        );
2536        let store = open(&dir);
2537        let recs = store.find_by_type("proposal").unwrap();
2538        assert_eq!(
2539            recs.len(),
2540            1,
2541            "only the records-layer proposal, not the sources decoy"
2542        );
2543        assert_eq!(recs[0].summary, "Q3 proposal");
2544        assert_eq!(recs[0].path, PathBuf::from("records/proposals/p1.md"));
2545    }
2546
2547    #[test]
2548    fn find_by_type_canonical_absent_does_not_read_other_layers() {
2549        let dir = empty_store();
2550        let root = dir.path();
2551        // `email`'s canonical folder is `sources/emails` (layer Sources). No
2552        // sidecar there yet, so `find_by_type("email")` falls back — but only
2553        // within the Sources layer. A populated sidecar in the Records layer
2554        // must never be touched: the fallback is layer-bounded, not store-wide.
2555        // Under the old `read_all_type_indexes_in(None)` fallback this records
2556        // sidecar would have been read and filtered (wasted O(store) I/O); now
2557        // it is outside the walk root entirely.
2558        write(
2559            root,
2560            "records/contacts/index.jsonl",
2561            &jsonl_line("records/contacts/sarah.md", "contact", "Sarah", ""),
2562        );
2563        let store = open(&dir);
2564        // No email anywhere ⇒ empty, and the records layer was not in scope.
2565        assert!(store.find_by_type("email").unwrap().is_empty());
2566    }
2567
2568    #[test]
2569    fn find_by_where_matches_typed_columns_and_flat_fields() {
2570        let dir = empty_store();
2571        let root = dir.path();
2572        write(
2573            root,
2574            "records/expenses/index.jsonl",
2575            &(jsonl_line(
2576                "records/expenses/a.md",
2577                "expense",
2578                "lunch",
2579                ",\"vendor\":\"acme\",\"tags\":[\"meals\"]",
2580            ) + &jsonl_line(
2581                "records/expenses/b.md",
2582                "expense",
2583                "taxi",
2584                ",\"vendor\":\"yellow\"",
2585            )),
2586        );
2587        write(
2588            root,
2589            "records/contacts/index.jsonl",
2590            &jsonl_line(
2591                "records/contacts/sarah.md",
2592                "contact",
2593                "Sarah",
2594                ",\"tags\":[\"customer\"]",
2595            ),
2596        );
2597        let store = open(&dir);
2598
2599        // Flat field in `fields`.
2600        let by_vendor = store.find_by_where("vendor", "acme").unwrap();
2601        assert_eq!(by_vendor.len(), 1);
2602        assert_eq!(by_vendor[0].path, PathBuf::from("records/expenses/a.md"));
2603
2604        // Typed column: type (spans both expense records).
2605        assert_eq!(store.find_by_where("type", "expense").unwrap().len(), 2);
2606
2607        // Typed list column: tags membership.
2608        let customers = store.find_by_where("tags", "customer").unwrap();
2609        assert_eq!(customers.len(), 1);
2610        assert_eq!(
2611            customers[0].path,
2612            PathBuf::from("records/contacts/sarah.md")
2613        );
2614
2615        // No match → empty.
2616        assert!(store.find_by_where("vendor", "nobody").unwrap().is_empty());
2617    }
2618
2619    #[test]
2620    fn find_by_where_matches_timestamps_across_rfc3339_spellings() {
2621        let dir = empty_store();
2622        let root = dir.path();
2623        // db.md files most commonly carry the `Z` UTC spelling. The index.jsonl
2624        // serialized from such a file preserves it verbatim.
2625        write(
2626            root,
2627            "records/meetings/index.jsonl",
2628            "{\"path\":\"records/meetings/kickoff.md\",\"type\":\"meeting\",\
2629\"summary\":\"kickoff\",\"created\":\"2026-05-01T00:00:00Z\",\
2630\"updated\":\"2026-05-02T09:30:00-07:00\"}\n",
2631        );
2632        let store = open(&dir);
2633
2634        // The exact value an agent reads out of the file (`Z` form) must match.
2635        let by_z = store
2636            .find_by_where("created", "2026-05-01T00:00:00Z")
2637            .unwrap();
2638        assert_eq!(by_z.len(), 1);
2639        assert_eq!(by_z[0].path, PathBuf::from("records/meetings/kickoff.md"));
2640
2641        // The equivalent explicit-offset spelling of the same instant matches too.
2642        assert_eq!(
2643            store
2644                .find_by_where("created", "2026-05-01T00:00:00+00:00")
2645                .unwrap()
2646                .len(),
2647            1
2648        );
2649
2650        // A non-UTC stored value matches both its own offset spelling and the
2651        // same instant expressed as `Z` (instant comparison, not string compare).
2652        assert_eq!(
2653            store
2654                .find_by_where("updated", "2026-05-02T09:30:00-07:00")
2655                .unwrap()
2656                .len(),
2657            1
2658        );
2659        assert_eq!(
2660            store
2661                .find_by_where("updated", "2026-05-02T16:30:00Z")
2662                .unwrap()
2663                .len(),
2664            1
2665        );
2666
2667        // A different instant does not match.
2668        assert!(store
2669            .find_by_where("created", "2026-05-01T00:00:01Z")
2670            .unwrap()
2671            .is_empty());
2672        // A non-RFC3339 query value never matches a real timestamp.
2673        assert!(store
2674            .find_by_where("created", "2026-05-01")
2675            .unwrap()
2676            .is_empty());
2677    }
2678
2679    #[test]
2680    fn find_by_where_in_layer_reads_only_that_layers_sidecars() {
2681        // The O(entities-in-layer) contract: a layer-scoped where read must walk
2682        // ONLY the named layer's subtree. Proven structurally — a *malformed*
2683        // sidecar in another layer would make `read_type_index` error if it were
2684        // read, so a scoped read that succeeds (and excludes that record) is
2685        // proof the other layer's I/O never happened.
2686        let dir = empty_store();
2687        let root = dir.path();
2688        write(
2689            root,
2690            "records/companies/index.jsonl",
2691            &jsonl_line(
2692                "records/companies/acme.md",
2693                "company",
2694                "Acme",
2695                ",\"domain\":\"acme.com\"",
2696            ),
2697        );
2698        // Same field/value in the sources layer — but the sidecar is corrupt.
2699        write(
2700            root,
2701            "sources/emails/index.jsonl",
2702            "{ this is not valid json and would error if read }\n",
2703        );
2704        let store = open(&dir);
2705
2706        // Scoped to records: the corrupt sources sidecar is out of scope, so the
2707        // read succeeds and returns only the records-layer match.
2708        let in_records = store
2709            .find_by_where_in("domain", "acme.com", Some(Layer::Records))
2710            .expect("a records-scoped read must not touch the sources sidecar");
2711        assert_eq!(
2712            rels(
2713                &in_records
2714                    .iter()
2715                    .map(|r| r.path.clone())
2716                    .collect::<Vec<_>>()
2717            ),
2718            vec!["records/companies/acme.md".to_string()]
2719        );
2720
2721        // The store-wide read DOES reach the corrupt sidecar and surfaces it as
2722        // a parse error — confirming the corrupt file is genuinely in the tree
2723        // and that only the layer scope spares it.
2724        let store_wide = store.find_by_where("domain", "acme.com");
2725        assert!(
2726            matches!(store_wide, Err(StoreError::BadTypeIndex { .. })),
2727            "unscoped read walks every layer and hits the corrupt sidecar"
2728        );
2729
2730        // Scoping to the layer that holds only the corrupt sidecar still errors
2731        // (the scope includes it), proving the scope is a real subtree bound and
2732        // not a silent "skip anything that fails".
2733        let in_sources = store.find_by_where_in("domain", "acme.com", Some(Layer::Sources));
2734        assert!(matches!(in_sources, Err(StoreError::BadTypeIndex { .. })));
2735    }
2736
2737    #[test]
2738    fn find_by_where_in_missing_layer_is_empty_not_an_error() {
2739        // A layer-scoped read over a layer folder that does not exist yet must
2740        // return empty (mirrors `walk_layer`'s missing-dir guard), never a walk
2741        // error from `ignore` over a nonexistent path.
2742        let dir = empty_store();
2743        let root = dir.path();
2744        write(
2745            root,
2746            "records/contacts/index.jsonl",
2747            &jsonl_line(
2748                "records/contacts/sarah.md",
2749                "contact",
2750                "Sarah",
2751                ",\"city\":\"denver\"",
2752            ),
2753        );
2754        let store = open(&dir);
2755
2756        // `sources/` was never created.
2757        let in_sources = store
2758            .find_by_where_in("city", "denver", Some(Layer::Sources))
2759            .expect("missing layer subtree is empty, not an error");
2760        assert!(in_sources.is_empty());
2761
2762        // Same query scoped to the layer that has the record still finds it.
2763        let in_records = store
2764            .find_by_where_in("city", "denver", Some(Layer::Records))
2765            .unwrap();
2766        assert_eq!(in_records.len(), 1);
2767    }
2768
2769    // ── abs_path / rel_path ──────────────────────────────────────────────────
2770
2771    #[test]
2772    fn abs_and_rel_path_roundtrip() {
2773        let dir = empty_store();
2774        let store = open(&dir);
2775        let rel = Path::new("records/contacts/sarah.md");
2776        let abs = store.abs_path(rel);
2777        assert_eq!(abs, dir.path().join(rel));
2778        assert_eq!(store.rel_path(&abs).as_deref(), Some(rel));
2779
2780        // An absolute path is passed through unchanged by abs_path.
2781        assert_eq!(store.abs_path(&abs), abs);
2782
2783        // A path outside the store has no store-relative form.
2784        assert_eq!(store.rel_path(Path::new("/somewhere/else.md")), None);
2785    }
2786
2787    // ── infer_type_from_path (inverse of default_type_folder) ────────────────
2788
2789    #[test]
2790    fn infer_type_maps_every_recognized_folder_back_to_its_type() {
2791        let cases = [
2792            ("sources/emails/x.md", "email"),
2793            ("sources/transcripts/x.md", "transcript"),
2794            ("sources/docs/x.md", "pdf-source"),
2795            ("sources/notes/x.md", "note"),
2796            ("records/contacts/x.md", "contact"),
2797            ("records/companies/x.md", "company"),
2798            ("records/expenses/x.md", "expense"),
2799            ("records/meetings/x.md", "meeting"),
2800            ("records/decisions/x.md", "decision"),
2801            ("records/invoices/x.md", "invoice"),
2802        ];
2803        for (path, expected) in cases {
2804            assert_eq!(
2805                infer_type_from_path(Path::new(path)).as_deref(),
2806                Some(expected),
2807                "path {path} should infer type {expected}"
2808            );
2809        }
2810    }
2811
2812    #[test]
2813    fn infer_type_round_trips_with_default_type_folder() {
2814        // The canonical invariant: inference is the inverse of the forward map.
2815        // Every recognized type, routed through `default_type_folder` and then
2816        // back through `infer_type_from_path`, must return the original type.
2817        // `wiki-page` is the one many-to-one case (every topic folder maps back
2818        // to `wiki-page`), so its forward folder still round-trips.
2819        let recognized = [
2820            "email",
2821            "transcript",
2822            "pdf-source",
2823            "contact",
2824            "company",
2825            "expense",
2826            "meeting",
2827            "decision",
2828            "invoice",
2829            "wiki-page",
2830        ];
2831        for type_ in recognized {
2832            let folder = default_type_folder(type_);
2833            let file = folder.join("x.md");
2834            assert_eq!(
2835                infer_type_from_path(&file).as_deref(),
2836                Some(type_),
2837                "recognized type {type_} (folder {folder:?}) must round-trip"
2838            );
2839        }
2840    }
2841
2842    #[test]
2843    fn infer_type_round_trips_custom_types_verbatim_no_singularization() {
2844        // Regression guard for the CLI/core divergence: `default_type_folder`'s
2845        // unrecognized fallback is the BARE type name (`task → records/task`,
2846        // `tasks → records/tasks`). Inference must NOT singularize, or a custom
2847        // type would not round-trip (e.g. `records/tasks` → `task` would clash
2848        // with `default_type_folder("task") → records/task`).
2849        for custom in ["task", "tasks", "playbook", "process", "okrs", "ticket"] {
2850            let folder = default_type_folder(custom);
2851            assert_eq!(folder, PathBuf::from("records").join(custom));
2852            let file = folder.join("x.md");
2853            assert_eq!(
2854                infer_type_from_path(&file).as_deref(),
2855                Some(custom),
2856                "custom type {custom} must round-trip verbatim (no singularization)"
2857            );
2858        }
2859
2860        // The specific case named in the finding: a plural custom folder keeps
2861        // its trailing `s`; it is NOT singularized to `task`.
2862        assert_eq!(
2863            infer_type_from_path(Path::new("records/tasks/x.md")).as_deref(),
2864            Some("tasks"),
2865            "records/tasks must infer `tasks`, not `task`"
2866        );
2867    }
2868
2869    #[test]
2870    fn infer_type_requires_three_component_layer_folder_file_shape() {
2871        // Fewer than 3 components: a file directly under a layer has no
2872        // type-folder, so inference yields None (matches the old CLI contract).
2873        assert_eq!(infer_type_from_path(Path::new("records/x.md")), None);
2874        assert_eq!(infer_type_from_path(Path::new("sources/x.md")), None);
2875        assert_eq!(infer_type_from_path(Path::new("wiki/x.md")), None);
2876        assert_eq!(infer_type_from_path(Path::new("x.md")), None);
2877        // Unknown leading layer is never inferred.
2878        assert_eq!(infer_type_from_path(Path::new("foo/bar/x.md")), None);
2879        // Deeper paths still infer from the first type-folder segment (e.g. a
2880        // sharded record under records/expenses/2026/05/x.md).
2881        assert_eq!(
2882            infer_type_from_path(Path::new("records/expenses/2026/05/x.md")).as_deref(),
2883            Some("expense"),
2884        );
2885    }
2886
2887    // ── ensure_path_within_store (containment) ───────────────────────────────
2888
2889    #[test]
2890    fn ensure_path_within_store_accepts_in_store_and_rejects_escape() {
2891        let dir = tempdir().unwrap();
2892        let root = dir.path();
2893        fs::create_dir_all(root.join("records/contacts")).unwrap();
2894        fs::write(root.join("records/contacts/sarah.md"), "x").unwrap();
2895
2896        // An existing in-store file resolves and is accepted.
2897        let inside = root.join("records/contacts/sarah.md");
2898        let got = ensure_path_within_store(root, &inside).expect("in-store path accepted");
2899        // Canonical, but still under the (canonical) root.
2900        assert!(got.starts_with(root.canonicalize().unwrap()));
2901
2902        // A not-yet-existing in-store leaf is accepted (rename destination).
2903        let new_leaf = root.join("records/contacts/sarah-chen.md");
2904        assert!(
2905            ensure_path_within_store(root, &new_leaf).is_ok(),
2906            "a non-existent in-store leaf must be accepted"
2907        );
2908
2909        // A `..`-escaping path is rejected even though its prefix exists.
2910        let escape = root.join("records/contacts/../../outside/secret.md");
2911        assert!(
2912            ensure_path_within_store(root, &escape).is_err(),
2913            "a `..`-escaping path must be rejected"
2914        );
2915    }
2916
2917    #[test]
2918    fn ensure_path_within_store_rejects_symlink_escape() {
2919        let dir = tempdir().unwrap();
2920        let root = dir.path().join("store");
2921        fs::create_dir_all(&root).unwrap();
2922        let outside_dir = dir.path().join("outside");
2923        fs::create_dir_all(&outside_dir).unwrap();
2924        let secret = outside_dir.join("secret.md");
2925        fs::write(&secret, "TOPSECRET").unwrap();
2926
2927        // A symlink inside the store that points OUTSIDE it must be rejected:
2928        // resolving the symlink lands outside the canonical root.
2929        #[cfg(unix)]
2930        {
2931            use std::os::unix::fs::symlink;
2932            let link = root.join("escape.md");
2933            symlink(&secret, &link).unwrap();
2934            assert!(
2935                ensure_path_within_store(&root, &link).is_err(),
2936                "a symlink resolving outside the store must be rejected"
2937            );
2938        }
2939    }
2940
2941    // ── shared link-edge notion (fence / whitespace / case) ──────────────────
2942
2943    #[test]
2944    fn extract_edge_targets_trims_inner_whitespace() {
2945        // Padded `[[ x ]]` is the same edge as `[[x]]`.
2946        assert_eq!(
2947            extract_edge_targets("See [[ records/contacts/sarah ]] today."),
2948            vec!["records/contacts/sarah".to_string()]
2949        );
2950    }
2951
2952    #[test]
2953    fn extract_edge_targets_skips_fenced_code_blocks() {
2954        // A `[[...]]` inside a ``` fence is a doc example, NOT an edge — matching
2955        // validate's body extractor.
2956        let body = "\
2957Real [[records/contacts/sarah]] link.
2958
2959```markdown
2960[[records/contacts/ghost-example]] is how you link.
2961```
2962
2963After fence [[records/companies/acme]].
2964";
2965        let got = extract_edge_targets(body);
2966        assert_eq!(
2967            got,
2968            vec![
2969                "records/contacts/sarah".to_string(),
2970                "records/companies/acme".to_string(),
2971            ],
2972            "fenced example link must not be an edge"
2973        );
2974    }
2975
2976    #[test]
2977    fn extract_edge_targets_handles_nested_indented_and_long_run_fences() {
2978        // Regression for the naive `starts_with("```")/("~~~")` toggle: a fence
2979        // nested inside another, an over-indented (>3 space) marker, and a
2980        // long-run fence wrapping a shorter inner one must all leave the block's
2981        // links un-extracted (validate treats the whole block as opaque). The
2982        // (char, run-length) tracker keys on the OPENING fence and closes only on
2983        // a matching char with run ≥ the opener.
2984
2985        // (a) A ```` ```` ````-run block (run 4) wrapping a ``` example (run 3).
2986        // The inner ``` does NOT close the outer run-4 fence, so both `[[...]]`
2987        // inside stay fenced.
2988        let nested = "\
2989Doc:
2990
2991````
2992```
2993[[records/contacts/bob]]
2994```
2995still fenced [[records/contacts/bob]]
2996````
2997
2998Real [[records/companies/acme]].
2999";
3000        assert_eq!(
3001            extract_edge_targets(nested),
3002            vec!["records/companies/acme".to_string()],
3003            "a nested ``` inside a ````-run fence must not leak the fenced links"
3004        );
3005
3006        // (b) A `~~~` block containing a ``` line (the standard way to document a
3007        // backtick fence). The inner backtick line must not flip the state.
3008        let tilde_wraps_backtick = "\
3009~~~
3010```
3011[[records/contacts/ghost]]
3012```
3013~~~
3014
3015After [[records/companies/acme]].
3016";
3017        assert_eq!(
3018            extract_edge_targets(tilde_wraps_backtick),
3019            vec!["records/companies/acme".to_string()],
3020            "a ``` line inside a ~~~ block must not invert the fence state"
3021        );
3022
3023        // (c) An over-indented ```` ``` ```` (4 spaces) is NOT a fence; the link
3024        // on the next line is live.
3025        let over_indented = "    ```\nLive [[records/contacts/sarah]].\n";
3026        assert_eq!(
3027            extract_edge_targets(over_indented),
3028            vec!["records/contacts/sarah".to_string()],
3029            "a >3-space-indented ``` is not a fence opener"
3030        );
3031    }
3032
3033    #[test]
3034    fn canonical_link_target_strips_md_dotslash_and_trims() {
3035        assert_eq!(canonical_link_target("  records/x.md  "), "records/x");
3036        assert_eq!(canonical_link_target("./wiki/y"), "wiki/y");
3037        assert_eq!(canonical_link_target("/records/z"), "records/z");
3038    }
3039
3040    #[test]
3041    fn link_edge_key_folds_case_only_on_case_insensitive_fs() {
3042        let a = link_edge_key("records/contacts/Sarah-Chen");
3043        let b = link_edge_key("records/contacts/sarah-chen");
3044        if fs_is_case_insensitive() {
3045            assert_eq!(a, b, "case-insensitive FS must fold the key");
3046        } else {
3047            assert_ne!(a, b, "case-sensitive FS must keep the key case-exact");
3048        }
3049    }
3050
3051    // ── walk follows symlinked content ───────────────────────────────────────
3052
3053    #[cfg(unix)]
3054    #[test]
3055    fn walk_includes_symlinked_content_file_and_symlinked_folder() {
3056        use std::os::unix::fs::symlink;
3057        let dir = empty_store();
3058        let root = dir.path();
3059        // A regular file (control).
3060        write(
3061            root,
3062            "records/contacts/sarah.md",
3063            &content_md("2026-05-01T00:00:00Z"),
3064        );
3065        // A symlinked .md content file inside a real folder.
3066        let external_file = root.join("external-elena.md");
3067        fs::write(&external_file, content_md("2026-05-02T00:00:00Z")).unwrap();
3068        symlink(&external_file, root.join("records/contacts/elena.md")).unwrap();
3069        // A symlinked type folder.
3070        let external_dir = dir.path().join("external-companies");
3071        fs::create_dir_all(&external_dir).unwrap();
3072        fs::write(
3073            external_dir.join("acme.md"),
3074            content_md("2026-05-03T00:00:00Z"),
3075        )
3076        .unwrap();
3077        symlink(&external_dir, root.join("records/companies")).unwrap();
3078
3079        let store = open(&dir);
3080        let got = rels(&store.walk().unwrap());
3081        assert!(
3082            got.contains(&"records/contacts/elena.md".to_string()),
3083            "a symlinked content file must be walked: {got:?}"
3084        );
3085        assert!(
3086            got.contains(&"records/companies/acme.md".to_string()),
3087            "a file inside a symlinked type folder must be walked: {got:?}"
3088        );
3089    }
3090
3091    // ── find_links_to: padded / fenced / case ────────────────────────────────
3092
3093    #[test]
3094    fn find_links_to_matches_whitespace_padded_link() {
3095        let dir = empty_store();
3096        let root = dir.path();
3097        write(
3098            root,
3099            "wiki/people/a.md",
3100            "---\ntype: wiki-page\nsummary: s\n---\nSee [[ records/contacts/sarah ]] today.\n",
3101        );
3102        let store = open(&dir);
3103        let got = rels(
3104            &store
3105                .find_links_to(Path::new("records/contacts/sarah"))
3106                .unwrap(),
3107        );
3108        assert_eq!(
3109            got,
3110            vec!["wiki/people/a.md".to_string()],
3111            "a padded `[[ x ]]` link must be found as a backward edge, matching forwardlinks"
3112        );
3113    }
3114
3115    #[test]
3116    fn find_links_to_ignores_fenced_example_link() {
3117        let dir = empty_store();
3118        let root = dir.path();
3119        write(
3120            root,
3121            "wiki/topics/howto.md",
3122            "---\ntype: wiki-page\nsummary: s\n---\n```markdown\n[[records/contacts/sarah]]\n```\n",
3123        );
3124        let store = open(&dir);
3125        let got = store
3126            .find_links_to(Path::new("records/contacts/sarah"))
3127            .unwrap();
3128        assert!(
3129            got.is_empty(),
3130            "a `[[...]]` only inside a fenced code block is not a backward edge: {got:?}"
3131        );
3132    }
3133
3134    #[cfg(unix)]
3135    #[test]
3136    fn find_links_to_matches_case_variant_on_case_insensitive_fs() {
3137        // Only meaningful on a case-insensitive filesystem; on a case-sensitive
3138        // one the case-variant link is genuinely a different target.
3139        if !fs_is_case_insensitive() {
3140            return;
3141        }
3142        let dir = empty_store();
3143        let root = dir.path();
3144        write(
3145            root,
3146            "wiki/people/bio.md",
3147            "---\ntype: wiki-page\nsummary: s\n---\nSee [[records/contacts/Sarah-Chen]].\n",
3148        );
3149        let store = open(&dir);
3150        let got = rels(
3151            &store
3152                .find_links_to(Path::new("records/contacts/sarah-chen"))
3153                .unwrap(),
3154        );
3155        assert_eq!(
3156            got,
3157            vec!["wiki/people/bio.md".to_string()],
3158            "a case-variant link must be found on a case-insensitive filesystem"
3159        );
3160    }
3161}