Skip to main content

dbmd_core/
store.rs

1//! `store` — walk, locate, and shard a db.md store.
2//!
3//! A db.md store is one directory marked by an uppercase `DB.md` at its root.
4//! [`Store::open`] is the single gate every store-walking subcommand goes
5//! through; a missing `DB.md` is the [`NotAStore`] error (`NOT_A_STORE`). The
6//! toolkit never guesses a store root.
7//!
8//! Scale discipline lives here: [`Store::walk`] and the layer/type-folder
9//! walks are **SWEEP** primitives used only by `validate --all`,
10//! `index rebuild`, and `stats`. The interactive loop instead uses
11//! [`Store::find_links_to`] / [`Store::find_links_to_any`] (a single
12//! presence-only content scan) and the `index.jsonl` sidecar readers
13//! ([`Store::find_by_type`] / [`Store::find_by_where`] /
14//! [`Store::read_type_index`]) — never a whole-store parse. The batch
15//! [`Store::find_links_to_any`] is what keeps the working-set validate's
16//! incoming-linker discovery a single store scan rather than one scan per
17//! changed object.
18//!
19//! Link edges are defined once, here, by the shared [`extract_edge_targets`] /
20//! [`canonical_link_target`] / [`link_edge_key`] helpers (fence-aware,
21//! whitespace-trimmed, case-folded to the filesystem), so the forward view
22//! (`graph::forwardlinks`), the backward view ([`Store::find_links_to_any`]),
23//! `rename`, and `validate` all agree on exactly which `[[...]]` is an edge.
24//! [`ensure_path_within_store`] is the within-store containment gate every
25//! caller-influenced path passes through before it is read or traversed.
26
27use std::collections::BTreeMap;
28use std::path::{Path, PathBuf};
29use std::time::{SystemTime, UNIX_EPOCH};
30
31use chrono::{DateTime, Datelike, FixedOffset};
32use ignore::WalkBuilder;
33
34use crate::index::IndexRecord;
35use crate::parser::{parse_db_md, Config, Frontmatter};
36
37/// Basenames that are never content files: the config marker and the two
38/// curator-maintained catalogs. The store walks skip these so a SWEEP over the
39/// content layers never mistakes a catalog for a record.
40///
41/// Only `index.md` is excluded by basename, because the content walks traverse
42/// the layer dirs (`sources/`/`records/`/`wiki/`) and `index.md` is the only
43/// meta file that appears INSIDE them. The root `DB.md` / `log.md` (and the
44/// `log/` archive) live at the store root, outside every layer, so they are
45/// never reached by these walks — and a content file that merely happens to be
46/// named `DB.md` or `log.md` inside a layer (e.g. `records/docs/DB.md`) is real
47/// content the SPEC does NOT reserve at type-folder depth.
48const NON_CONTENT_BASENAMES: [&str; 1] = ["index.md"];
49
50/// The complete machine-twin sidecar that backs every structured read.
51const TYPE_INDEX_FILE: &str = "index.jsonl";
52
53/// Returned when a path is opened as a store but has no `DB.md` at its root.
54/// Surfaced as the structured code `NOT_A_STORE` with a non-zero exit.
55#[derive(Debug, thiserror::Error)]
56#[error("not a db.md store: {path} has no DB.md")]
57pub struct NotAStore {
58    /// The path that was inspected.
59    pub path: PathBuf,
60}
61
62/// Errors from store-level operations (walk, locate, shard, sidecar read).
63#[derive(Debug, thiserror::Error)]
64pub enum StoreError {
65    /// A sidecar `index.jsonl` could not be read or parsed.
66    #[error("failed to read type index {path}: {message}")]
67    BadTypeIndex {
68        /// The sidecar file.
69        path: PathBuf,
70        /// What went wrong.
71        message: String,
72    },
73
74    /// A required date field for sharding was absent or unparseable, and there
75    /// was no usable fallback.
76    #[error("cannot compute shard path for {file}: no usable date field")]
77    NoShardDate {
78        /// The file being placed.
79        file: PathBuf,
80    },
81
82    /// An embedded-ripgrep scan failed to start or run.
83    #[error("search failed under {root}: {message}")]
84    Search {
85        /// The root the scan ran under.
86        root: PathBuf,
87        /// What went wrong.
88        message: String,
89    },
90
91    /// An underlying I/O failure.
92    #[error(transparent)]
93    Io(#[from] std::io::Error),
94}
95
96/// The three canonical layers of a db.md store.
97///
98/// `Ord`/`PartialOrd` are derived (additively) because sibling modules key
99/// `BTreeMap`s on `Layer` (e.g. `stats::Stats::files_per_layer`); the canonical
100/// declaration order (`Sources` < `Records` < `Wiki`) is the sort order.
101#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
102pub enum Layer {
103    /// `sources/` — raw evidence; immutable; date-sharded at scale.
104    Sources,
105    /// `records/` — atomic typed data; entity types flat, event types sharded.
106    Records,
107    /// `wiki/` — curator-synthesized narrative; flat.
108    Wiki,
109}
110
111impl Layer {
112    /// The on-disk folder name for this layer (`"sources"` / `"records"` /
113    /// `"wiki"`).
114    pub fn dir_name(self) -> &'static str {
115        match self {
116            Layer::Sources => "sources",
117            Layer::Records => "records",
118            Layer::Wiki => "wiki",
119        }
120    }
121
122    /// Parse a layer from its folder name; `None` for anything else.
123    pub fn from_dir_name(name: &str) -> Option<Self> {
124        match name {
125            "sources" => Some(Layer::Sources),
126            "records" => Some(Layer::Records),
127            "wiki" => Some(Layer::Wiki),
128            _ => None,
129        }
130    }
131
132    /// Every layer, in canonical order.
133    pub fn all() -> [Layer; 3] {
134        [Layer::Sources, Layer::Records, Layer::Wiki]
135    }
136}
137
138/// An opened db.md store: its root path plus the parsed `DB.md` [`Config`].
139///
140/// Construct via [`Store::open`]; that is the only path in, and it validates
141/// the `DB.md` marker so downstream code can assume a real store.
142#[derive(Debug, Clone)]
143pub struct Store {
144    /// The store root (the directory containing `DB.md`).
145    pub root: PathBuf,
146    /// The parsed `DB.md` config (agent instructions, policies, schemas).
147    pub config: Config,
148}
149
150impl Store {
151    /// True if `path` is a db.md store root: an uppercase `DB.md` file exists
152    /// at `path`. On case-sensitive filesystems a lowercase `db.md` must NOT
153    /// count (the lowercase name refers to the project/spec, not the marker).
154    pub fn is_db_md_store(path: &Path) -> bool {
155        // Read the directory and match the *stored* filename byte-for-byte.
156        // `path.join("DB.md").exists()` would lie on a case-insensitive
157        // filesystem (macOS default), where a lowercase `db.md` answers a
158        // `DB.md` probe. `read_dir` returns the real on-disk name, so the
159        // exact-match check is correct on both case-sensitive (Linux) and
160        // case-insensitive filesystems.
161        let entries = match std::fs::read_dir(path) {
162            Ok(entries) => entries,
163            Err(_) => return false,
164        };
165        for entry in entries.flatten() {
166            if entry.file_name() == "DB.md" {
167                // A directory literally named `DB.md` is not the marker.
168                match entry.file_type() {
169                    Ok(ft) if ft.is_dir() => return false,
170                    Ok(_) => return true,
171                    Err(_) => return false,
172                }
173            }
174        }
175        false
176    }
177
178    /// Open `path` as a db.md store and require `DB.md` to be readable and
179    /// parseable. Normal commands should enter through this strict gate so a
180    /// damaged config cannot silently disable schema or policy rules.
181    pub fn open_strict(path: &Path) -> crate::Result<Store> {
182        if !Store::is_db_md_store(path) {
183            return Err(NotAStore {
184                path: path.to_path_buf(),
185            }
186            .into());
187        }
188        let db_md = path.join("DB.md");
189        let text = std::fs::read_to_string(&db_md)?;
190        let config = parse_db_md(&text, &db_md)?;
191        Ok(Store {
192            root: path.to_path_buf(),
193            config,
194        })
195    }
196
197    /// Open `path` as a db.md store: confirm the `DB.md` marker (else
198    /// [`NotAStore`]) and parse the `DB.md` config when possible. This is the
199    /// lenient validation-oriented open path: a damaged `DB.md` still marks the
200    /// directory as a store so `dbmd validate` can report the config error as an
201    /// issue. Normal CLI commands should use [`Store::open_strict`] instead.
202    pub fn open(path: &Path) -> Result<Store, NotAStore> {
203        if !Store::is_db_md_store(path) {
204            return Err(NotAStore {
205                path: path.to_path_buf(),
206            });
207        }
208        let db_md = path.join("DB.md");
209        // The marker exists; parse its config. A read or parse failure leaves
210        // the store openable with default config rather than masquerading as
211        // NOT_A_STORE — the marker is present, so this *is* a store; a damaged
212        // DB.md is `dbmd validate`'s job to report, not `open`'s.
213        let config = match std::fs::read_to_string(&db_md) {
214            Ok(text) => parse_db_md(&text, &db_md).unwrap_or_default(),
215            Err(_) => Config::default(),
216        };
217        Ok(Store {
218            root: path.to_path_buf(),
219            config,
220        })
221    }
222
223    /// **SWEEP.** Recursively iterate every `.md` content file across
224    /// `sources/`, `records/`, and `wiki/`, skipping hidden dirs and `log/`.
225    /// Used only by `validate --all`, `index rebuild`, and `stats` — never on
226    /// the interactive loop.
227    pub fn walk(&self) -> Result<Vec<PathBuf>, StoreError> {
228        // Only the three content layers — never root meta files (`DB.md`,
229        // `index.md`, `log.md`) and never `log/`, which live at root and are
230        // outside every layer dir.
231        let mut out = Vec::new();
232        for layer in Layer::all() {
233            out.extend(self.walk_layer(layer)?);
234        }
235        out.sort();
236        Ok(out)
237    }
238
239    /// **SWEEP.** Like [`Store::walk`] but scoped to a single layer.
240    pub fn walk_layer(&self, layer: Layer) -> Result<Vec<PathBuf>, StoreError> {
241        let layer_root = self.root.join(layer.dir_name());
242        if !layer_root.is_dir() {
243            return Ok(Vec::new());
244        }
245        self.walk_content_md(&layer_root)
246    }
247
248    /// Enumerate every `.md` file in a single type-folder, **recursing through
249    /// its date-shards** (`sources/emails/**/*.md`). The unit the index builder
250    /// and per-folder rebuild operate on. SWEEP-class (scoped to one folder).
251    pub fn walk_type_folder(&self, type_folder: &Path) -> Result<Vec<PathBuf>, StoreError> {
252        let abs = self.resolve_under_root(type_folder);
253        if !abs.is_dir() {
254            return Ok(Vec::new());
255        }
256        self.walk_content_md(&abs)
257    }
258
259    /// The ≤`n` most-recent files in a type-folder by frontmatter `updated`
260    /// (descending), ties broken by store-relative path (ascending) — a total
261    /// order, so write-through and rebuild never disagree on #500 vs #501.
262    ///
263    /// Reads `updated` across the folder's shards — a SWEEP cost absorbed into
264    /// `index rebuild`. The write-through path never calls this. The
265    /// cap-selection primitive for the 500-entry `index.md` browse view.
266    pub fn recent_in_type_folder(
267        &self,
268        type_folder: &Path,
269        n: usize,
270    ) -> Result<Vec<PathBuf>, StoreError> {
271        let files = self.walk_type_folder(type_folder)?;
272        // (updated, rel-path) for each file. Files missing/unparseable
273        // `updated` sort *after* dated ones (None last), then by path — so they
274        // are deterministically the lowest-priority candidates for the cap, not
275        // dropped silently. The total order (updated desc, path asc) is what
276        // keeps write-through and rebuild agreeing on #500 vs #501.
277        let mut keyed: Vec<(Option<DateTime<FixedOffset>>, PathBuf)> = files
278            .into_iter()
279            .map(|rel| {
280                let updated = self.read_updated(&self.abs_path(&rel));
281                (updated, rel)
282            })
283            .collect();
284        keyed.sort_by(|a, b| {
285            // `updated` descending: newest first. `None` is treated as the
286            // oldest possible, so dated files always win a cap slot over
287            // undated ones.
288            let by_updated = b.0.cmp(&a.0);
289            by_updated.then_with(|| a.1.cmp(&b.1))
290        });
291        keyed.truncate(n);
292        Ok(keyed.into_iter().map(|(_, rel)| rel).collect())
293    }
294
295    /// The shard/flat predicate: true if the type date-shards, false if it
296    /// stays flat. True for source types and event record types
297    /// (`expense`/`invoice`/`meeting` + custom `order`/`ticket`/`transaction`),
298    /// or when `DB.md ## Schemas` declares `shard: by-date`. False for
299    /// dedup-bounded entity types (`contact`/`company`/`decision`) and `wiki/`.
300    pub fn type_shards(&self, type_: &str) -> bool {
301        // A `DB.md ## Schemas` `### <type>` block with a `shard:` directive is
302        // authoritative — it is the v0.2 generic-model way to declare sharding,
303        // so it overrides the built-in default below (in either direction).
304        if let Some(shard) = self.config.schemas.get(type_).and_then(|s| s.shard) {
305            return shard;
306        }
307        // Built-in default for the example types. Sharding is a property of the
308        // *type*:
309        //  - source types carry a primary date field and shard;
310        //  - event record types track business volume and shard;
311        //  - dedup-bounded entity types and curation-bounded wiki stay flat.
312        // Any type can override this via a `shard:` directive (above).
313        matches!(
314            type_,
315            // source types
316            "email" | "transcript" | "pdf-source"
317            // event record types (canonical)
318            | "expense" | "invoice" | "meeting"
319            // event record types (recognized custom, per the plan)
320            | "order" | "ticket" | "transaction"
321        )
322    }
323
324    /// Compute the canonical write path for a new file. For a sharding type
325    /// (per [`Store::type_shards`]) insert `<YYYY>/<MM>/` from the type's
326    /// primary date field (`email.date`, `expense.date`, … fallback `created`)
327    /// under the type folder; flat types and `wiki/` get no shard segment.
328    /// Deterministic + stable: same input → same path, so a record never moves
329    /// once written.
330    pub fn shard_path_for(
331        &self,
332        type_: &str,
333        frontmatter: &Frontmatter,
334        name: &str,
335    ) -> Result<PathBuf, StoreError> {
336        self.shard_path_in(&default_type_folder(type_), type_, frontmatter, name)
337    }
338
339    /// Like [`Store::shard_path_for`], but compute the path under an explicit,
340    /// caller-resolved type-folder rather than the canonical default. This lets a
341    /// write surface honour an agent-supplied conforming sub-folder — e.g.
342    /// `wiki/projects/`, `wiki/people/`, `wiki/synthesis/` (the SPEC files a
343    /// `wiki-page` under `wiki/<topic>/`, i.e. ANY topic sub-folder, not only the
344    /// `wiki/topics` default) — while still applying date-sharding for sharding
345    /// types. The folder must be a conforming `<layer>/<type-folder>` (2
346    /// components, recognized layer); the caller is responsible for that (see the
347    /// CLI's `resolve_write_path`), so it is taken as given here.
348    ///
349    /// Sharding is still a property of the *type*: a sharding type gets the
350    /// `<YYYY>/<MM>` segment under `folder`; a flat type lands directly in it.
351    pub fn shard_path_in(
352        &self,
353        folder: &Path,
354        type_: &str,
355        frontmatter: &Frontmatter,
356        name: &str,
357    ) -> Result<PathBuf, StoreError> {
358        let folder = folder.to_path_buf();
359        let filename = ensure_md_extension(name);
360
361        if !self.type_shards(type_) {
362            // Flat type (entity records, wiki, decisions): no shard segment.
363            return Ok(folder.join(filename));
364        }
365
366        // Sharding type: derive <YYYY>/<MM> from the primary date field, with
367        // `created` as the universal fallback. Reading the public `Frontmatter`
368        // fields directly (typed `created`/`updated` + raw `extra`) avoids the
369        // not-yet-implemented `Frontmatter::get`/`parse` and keeps this pure.
370        let (year, month) = self
371            .primary_shard_segment(type_, frontmatter)
372            .ok_or_else(|| StoreError::NoShardDate {
373                file: folder.join(&filename),
374            })?;
375
376        Ok(folder.join(year).join(month).join(filename))
377    }
378
379    /// Find files with an incoming wiki-link to `target` via a **single
380    /// presence-only content scan** for an edge to `target` across all layers,
381    /// using the shared fence-aware/whitespace-trimmed/case-folded edge notion
382    /// ([`extract_edge_targets`]). Loop-fast; no whole-graph build. Returns
383    /// store-relative paths.
384    pub fn find_links_to(&self, target: &Path) -> Result<Vec<PathBuf>, StoreError> {
385        // A single target is just the degenerate batch case — one key, one store
386        // scan. Routing through `find_links_to_any` keeps the
387        // pattern construction and the scan loop in exactly one place. The
388        // batch API takes `&[PathBuf]`, so the one-element slice is owned (a
389        // single alloc on this single-target convenience path; the batch path
390        // validate.rs rides is untouched).
391        self.find_links_to_any(&[target.to_path_buf()])
392    }
393
394    /// Find every file with an incoming wiki-link to **any** of `targets`, in a
395    /// **single content pass** over the store (one `.md` walk, one presence-only
396    /// edge scan per file). This is the batch incoming-linker finder the
397    /// working-set [`crate::validate::validate_working_set`] sits on: it must find
398    /// the linkers for the *whole* changed set without paying a full store read
399    /// per changed object. Cost is therefore one store scan (O(store)), NOT
400    /// `targets.len() × store` — calling [`find_links_to`](Self::find_links_to)
401    /// in a loop would reread every `.md` once per target and is the exact
402    /// `O(changed × store)` blow-up this method exists to prevent. Returns
403    /// store-relative paths (deduped, sorted).
404    ///
405    /// **One edge notion with `forwardlinks`/`rename`/`validate`.** A file links
406    /// to a target iff [`extract_edge_targets`] (fence-aware, whitespace-trimmed)
407    /// of its content yields a target whose [`link_edge_key`] equals the target's
408    /// — the *same* definition the forward view and the rename rewriter use. The
409    /// previous implementation used a literal-adjacency ripgrep regex that (a)
410    /// matched `[[...]]` text inside fenced code examples (which validate treats
411    /// as non-edges), (b) missed inner-whitespace padding (`[[ x ]]`), and (c)
412    /// compared case-sensitively even where the filesystem resolves links
413    /// case-insensitively — so backlinks/links/rename silently disagreed with
414    /// forwardlinks and validate. Reading content and routing through the shared
415    /// extractor removes all three divergences.
416    ///
417    /// Why content scan and not the sidecar `links` field: the sidecar projects
418    /// only the frontmatter `links:` array, so it misses edges written in the
419    /// body or in typed fields (`company: [[…]]`). Finding an incoming link to an
420    /// arbitrary path therefore requires reading file content.
421    pub fn find_links_to_any(&self, targets: &[PathBuf]) -> Result<Vec<PathBuf>, StoreError> {
422        // Build the set of comparison keys for the requested targets, in the
423        // canonical (case-folded where the filesystem is case-insensitive) form
424        // the edge extractor emits. An empty key (a target that renders to no
425        // link text, e.g. `""` or `"./"`) contributes nothing — and crucially the
426        // empty set short-circuits below so we never report every file.
427        let want: std::collections::HashSet<String> = targets
428            .iter()
429            .filter_map(|t| {
430                let canonical = canonical_link_target(&t.to_string_lossy());
431                if canonical.is_empty() {
432                    None
433                } else {
434                    Some(link_edge_key(&canonical))
435                }
436            })
437            .collect();
438        if want.is_empty() {
439            return Ok(Vec::new());
440        }
441
442        let mut hits = std::collections::BTreeSet::new();
443        // Scan every `.md` file in the store (skip hidden + `log/`), including
444        // `index.md` catalogs — an incoming reference is wherever the link text
445        // lives; the caller decides relevance. ONE walk for the whole target set;
446        // per file we stop at the first matching edge (presence is all we need),
447        // so a file that links to several targets is read once, not once per
448        // target.
449        for rel in self.walk_all_md()? {
450            let abs = self.abs_path(&rel);
451            // Read lossily: a `.md` verbatim-ingested into `sources/` can carry a
452            // stray non-UTF-8 byte (a mis-decoded Latin-1 import). Decoding
453            // lossily substitutes replacement characters instead of erroring, so
454            // one bad byte on a link-bearing line no longer aborts the whole
455            // store scan (the historical `UTF8`-sink failure). The link syntax is
456            // ASCII, so a replacement char elsewhere on the line never hides a
457            // `[[...]]`. A read error (not a decode error) is genuine I/O trouble
458            // and propagates.
459            let bytes = match std::fs::read(&abs) {
460                Ok(b) => b,
461                Err(e) => {
462                    return Err(StoreError::Search {
463                        root: self.root.clone(),
464                        message: format!("read failed in {}: {e}", abs.display()),
465                    })
466                }
467            };
468            let text = String::from_utf8_lossy(&bytes);
469            for target in extract_edge_targets(&text) {
470                if want.contains(&link_edge_key(&target)) {
471                    hits.insert(rel);
472                    break;
473                }
474            }
475        }
476        Ok(hits.into_iter().collect())
477    }
478
479    /// Candidate set for a `type` query: read every type-folder `index.jsonl`
480    /// sidecar in the type's single layer and return the records of that
481    /// `type`. Complete and cold-cache-proof — NOT a walk-and-parse or a
482    /// frontmatter ripgrep scan, and **never a store-wide read**.
483    ///
484    /// The read is bounded to the type's one layer subtree
485    /// (O(entities-in-layer)): a type lives in exactly one layer, and
486    /// `default_type_folder` always encodes it (recognized → its SPEC layer;
487    /// unrecognized → `records/`), so the walk never fans out across every
488    /// sidecar in the store and stays inside the interactive loop's
489    /// O(entities) contract.
490    ///
491    /// The whole-layer read — rather than reading only the type's canonical
492    /// folder sidecar when it happens to exist — is what makes the result
493    /// *complete*. A single `type` can legitimately be filed across several
494    /// folders within its layer: `wiki-page` under `wiki/<topic>/` for any
495    /// topic (SPEC), or a `contact` filed in `records/clients/` alongside the
496    /// canonical `records/contacts/`. The previous code read only the
497    /// canonical-guess sidecar whenever it was a file, which silently dropped
498    /// those non-canonical records the moment the canonical sidecar existed —
499    /// returning an incomplete set, and a *different* set as the store grew
500    /// (the omission flipped on once one canonical record was added). That
501    /// broke the dedup/enumeration premise this primitive backs and disagreed
502    /// with `find_by_where_in`, which already walks the whole layer. Filtering
503    /// the layer read by `type` keeps the result complete regardless of how the
504    /// type's records are foldered.
505    pub fn find_by_type(&self, type_: &str) -> Result<Vec<IndexRecord>, StoreError> {
506        let canonical_folder = default_type_folder(type_);
507        let records = self.read_all_type_indexes_in(layer_of_folder(&canonical_folder))?;
508        Ok(records.into_iter().filter(|r| r.type_ == type_).collect())
509    }
510
511    /// Candidate set for a `key=value` frontmatter query, **store-wide**: read
512    /// every type-folder `index.jsonl` sidecar and filter their records. The
513    /// unscoped pre-write dedup primitive; prefer [`Store::find_by_where_in`]
514    /// with a layer scope to stay O(entities-in-layer) on the interactive loop.
515    pub fn find_by_where(&self, key: &str, value: &str) -> Result<Vec<IndexRecord>, StoreError> {
516        self.find_by_where_in(key, value, None)
517    }
518
519    /// Candidate set for a `key=value` frontmatter query, **scoped to one
520    /// layer** when `layer` is `Some`: the sidecar walk is confined to that
521    /// layer's subtree (`<root>/<layer>/`), so the I/O is O(entities-in-layer),
522    /// not O(store records). `None` keeps the store-wide read.
523    ///
524    /// This is what makes `--in <layer>` an I/O scope, not just a result
525    /// filter: a `--where`-only query (no `--type`) used to read every sidecar
526    /// in the store and narrow by layer in memory, breaking the O(entities)
527    /// contract the interactive loop depends on. With a layer in hand we walk
528    /// only that layer's sidecars.
529    pub fn find_by_where_in(
530        &self,
531        key: &str,
532        value: &str,
533        layer: Option<Layer>,
534    ) -> Result<Vec<IndexRecord>, StoreError> {
535        // A `key=value` query can target any frontmatter field across any type,
536        // so within the chosen subtree we still read every type-folder sidecar
537        // and filter. The layer (when given) bounds *which* subtree, turning a
538        // whole-store walk into a single-layer walk.
539        let records = self.read_all_type_indexes_in(layer)?;
540        Ok(records
541            .into_iter()
542            .filter(|r| record_matches_field(r, key, value))
543            .collect())
544    }
545
546    /// Every record across the type-folder `index.jsonl` sidecars, scoped to one
547    /// layer when `layer` is `Some` (the walk is confined to `<root>/<layer>/`)
548    /// else store-wide. Sequential, complete sidecar reads — never a
549    /// walk-and-parse of the content tree.
550    ///
551    /// This is the unfiltered sidecar-enumeration primitive the relationship
552    /// loop sits on: [`crate::graph::backlinks_filtered`] uses it to bound its
553    /// candidate set to the relevant layer (or the whole store) without opening
554    /// the content tree, then confirms each candidate's edge by parsing the file.
555    pub fn sidecar_records(&self, layer: Option<Layer>) -> Result<Vec<IndexRecord>, StoreError> {
556        self.read_all_type_indexes_in(layer)
557    }
558
559    /// Parse a type-folder's `index.jsonl` into [`IndexRecord`]s, applying
560    /// last-write-wins by `path` over any un-compacted lines. The sidecar-read
561    /// primitive every structured query sits on.
562    pub fn read_type_index(&self, index_jsonl: &Path) -> Result<Vec<IndexRecord>, StoreError> {
563        let text = std::fs::read_to_string(index_jsonl).map_err(|e| StoreError::BadTypeIndex {
564            path: index_jsonl.to_path_buf(),
565            message: e.to_string(),
566        })?;
567
568        // Last-write-wins by `path` over un-compacted lines: a later line for
569        // the same path supersedes an earlier one (the jsonl is append-mostly
570        // and only compacted on rebuild). Blank lines are skipped; a non-blank
571        // line that is not a valid IndexRecord is a hard parse error.
572        let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
573        for (i, line) in text.lines().enumerate() {
574            let trimmed = line.trim();
575            if trimmed.is_empty() {
576                continue;
577            }
578            let record: IndexRecord =
579                serde_json::from_str(trimmed).map_err(|e| StoreError::BadTypeIndex {
580                    path: index_jsonl.to_path_buf(),
581                    message: format!("line {}: {e}", i + 1),
582                })?;
583            by_path.insert(record.path.clone(), record);
584        }
585        // BTreeMap keyed by path → records emerge sorted by path ascending,
586        // a deterministic order independent of line order in the file.
587        Ok(by_path.into_values().collect())
588    }
589
590    /// Resolve a store-relative path to its absolute on-disk path under
591    /// [`root`](Store::root).
592    pub fn abs_path(&self, store_relative: &Path) -> PathBuf {
593        // `Path::join` returns `store_relative` unchanged if it is already
594        // absolute, so passing an absolute path through is a no-op.
595        self.root.join(store_relative)
596    }
597
598    /// Convert an absolute path under the store into its store-relative form.
599    pub fn rel_path(&self, abs: &Path) -> Option<PathBuf> {
600        abs.strip_prefix(&self.root).ok().map(|p| p.to_path_buf())
601    }
602
603    // ── Private helpers ─────────────────────────────────────────────────────
604
605    /// Resolve a caller-supplied folder path (store-relative or absolute) to an
606    /// absolute path under the store root.
607    fn resolve_under_root(&self, folder: &Path) -> PathBuf {
608        if folder.is_absolute() {
609            folder.to_path_buf()
610        } else {
611            self.root.join(folder)
612        }
613    }
614
615    /// Walk a subtree for content `.md` files (skip hidden dirs, skip `index.md`
616    /// / `DB.md` / `log.md`), returning store-relative paths. Used by the layer
617    /// and type-folder walks.
618    fn walk_content_md(&self, root: &Path) -> Result<Vec<PathBuf>, StoreError> {
619        let mut out = Vec::new();
620        for entry in self.md_walker(root).build() {
621            let entry = entry.map_err(|e| StoreError::Search {
622                root: root.to_path_buf(),
623                message: e.to_string(),
624            })?;
625            if !is_file_entry(&entry) {
626                continue;
627            }
628            let path = entry.path();
629            if !has_md_extension(path) {
630                continue;
631            }
632            if is_non_content_basename(path) {
633                continue;
634            }
635            if let Some(rel) = self.rel_path(path) {
636                out.push(rel);
637            }
638        }
639        out.sort();
640        Ok(out)
641    }
642
643    /// Walk the whole store for **every** `.md` file (including `index.md`),
644    /// skipping hidden dirs and the `log/` archive tree. Used by the backlink
645    /// scan, where the literal link text can live in any markdown file.
646    fn walk_all_md(&self) -> Result<Vec<PathBuf>, StoreError> {
647        let mut out = Vec::new();
648        for entry in self.md_walker(&self.root).build() {
649            let entry = entry.map_err(|e| StoreError::Search {
650                root: self.root.clone(),
651                message: e.to_string(),
652            })?;
653            if !is_file_entry(&entry) {
654                continue;
655            }
656            let path = entry.path();
657            if !has_md_extension(path) {
658                continue;
659            }
660            if self.is_in_log_dir(path) {
661                continue;
662            }
663            if let Some(rel) = self.rel_path(path) {
664                out.push(rel);
665            }
666        }
667        out.sort();
668        Ok(out)
669    }
670
671    /// Read and merge every type-folder `index.jsonl` sidecar under `layer`
672    /// when given, else the whole store (skip hidden + `log/`). Each sidecar is
673    /// read with last-write-wins by path; across sidecars, paths are disjoint by
674    /// construction (one sidecar per folder), so a plain concatenation preserves
675    /// completeness. A layer scope confines the walk to `<root>/<layer>/`, which
676    /// is what keeps `find_by_where_in` O(entities-in-layer).
677    fn read_all_type_indexes_in(
678        &self,
679        layer: Option<Layer>,
680    ) -> Result<Vec<IndexRecord>, StoreError> {
681        let mut out = Vec::new();
682        for sidecar in self.find_type_index_files_in(layer)? {
683            out.extend(self.read_type_index(&self.abs_path(&sidecar))?);
684        }
685        Ok(out)
686    }
687
688    /// Locate every `index.jsonl` sidecar under `layer` (when given) else the
689    /// whole store (skip hidden + `log/`), returning store-relative paths. A
690    /// scoped read walks `<root>/<layer>/`; the store-wide read enumerates the
691    /// three canonical layer subtrees (`sources/`, `records/`, `wiki/`) — the
692    /// same store model [`Store::walk`] uses — rather than walking from
693    /// `self.root`. Walking from root would descend into non-layer top-level
694    /// dirs (`EXPECTED/` test goldens, an `archive/` of frozen index copies,
695    /// any sibling folder holding store-relative `path`s), pulling their
696    /// sidecars in and returning every record twice. A non-existent layer
697    /// subtree yields no sidecars rather than walking a missing path.
698    fn find_type_index_files_in(&self, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
699        // Store-wide read: union the per-layer scoped reads so only the three
700        // content layers are walked (never root meta files or non-layer dirs),
701        // matching `Store::walk`. The per-layer paths are disjoint by folder, so
702        // a plain concatenation preserves completeness.
703        let Some(layer) = layer else {
704            let mut out = Vec::new();
705            for l in Layer::all() {
706                out.extend(self.find_type_index_files_in(Some(l))?);
707            }
708            out.sort();
709            return Ok(out);
710        };
711        let walk_root = self.root.join(layer.dir_name());
712        // A scoped walk over a layer folder that does not exist yet must be an
713        // empty result, mirroring `walk_layer`'s missing-dir guard — not a walk
714        // error from `ignore` over a nonexistent path.
715        if !walk_root.is_dir() {
716            return Ok(Vec::new());
717        }
718        let mut out = Vec::new();
719        let mut builder = WalkBuilder::new(&walk_root);
720        builder
721            .standard_filters(false)
722            .hidden(true)
723            .follow_links(true);
724        for entry in builder.build() {
725            let entry = entry.map_err(|e| StoreError::Search {
726                root: walk_root.clone(),
727                message: e.to_string(),
728            })?;
729            if !is_file_entry(&entry) {
730                continue;
731            }
732            let path = entry.path();
733            if path.file_name().and_then(|n| n.to_str()) != Some(TYPE_INDEX_FILE) {
734                continue;
735            }
736            if self.is_in_log_dir(path) {
737                continue;
738            }
739            if let Some(rel) = self.rel_path(path) {
740                out.push(rel);
741            }
742        }
743        out.sort();
744        Ok(out)
745    }
746
747    /// A `WalkBuilder` configured for db.md SWEEPs: gitignore/global-ignore are
748    /// OFF (a SWEEP must see every file even if the store is a git repo with a
749    /// `.gitignore`), but hidden files/dirs are skipped. Symlinks are
750    /// **followed** (`follow_links(true)`) so a symlinked `.md` content file or
751    /// a symlinked type folder (e.g. `records/companies -> /other/disk/...`) is
752    /// walked like any other content rather than silently vanishing; a symlinked
753    /// layer dir was already traversed (the walk root is followed), so following
754    /// symlinks one level deeper just removes that inconsistency.
755    fn md_walker(&self, root: &Path) -> WalkBuilder {
756        let mut builder = WalkBuilder::new(root);
757        builder
758            .standard_filters(false)
759            .hidden(true)
760            .follow_links(true);
761        builder
762    }
763
764    /// True if an absolute path lives under the store's root-level `log/`
765    /// rotation-archive directory.
766    fn is_in_log_dir(&self, abs: &Path) -> bool {
767        match self.rel_path(abs) {
768            Some(rel) => rel.components().next().map(|c| c.as_os_str()) == Some("log".as_ref()),
769            None => false,
770        }
771    }
772
773    /// Read a file's frontmatter `updated` field as an RFC3339 timestamp,
774    /// returning `None` when absent/unparseable. A self-contained reader (does
775    /// not depend on the not-yet-implemented `parser::read_file`); parses the
776    /// leading `---`-fenced YAML block with the same engine the parser uses.
777    fn read_updated(&self, abs: &Path) -> Option<DateTime<FixedOffset>> {
778        let text = std::fs::read_to_string(abs).ok()?;
779        let yaml = frontmatter_block(&text)?;
780        let value: serde_norway::Value = serde_norway::from_str(yaml).ok()?;
781        let raw = value.get("updated")?;
782        value_to_datetime(raw)
783    }
784
785    /// The `<YYYY>/<MM>` shard segment for a sharding type, from its primary
786    /// date field with a `created` fallback. Reads the public `Frontmatter`
787    /// fields directly. `None` when no usable date is present.
788    fn primary_shard_segment(&self, type_: &str, fm: &Frontmatter) -> Option<(String, String)> {
789        // Try the type's primary date field first.
790        if let Some(field) = primary_date_field(type_) {
791            if let Some(v) = fm.extra.get(field) {
792                if let Some(seg) = value_to_year_month(v) {
793                    return Some(seg);
794                }
795            }
796        }
797        // Universal fallback: the typed `created` timestamp.
798        fm.created
799            .map(|dt| (format!("{:04}", dt.year()), format!("{:02}", dt.month())))
800    }
801}
802
803// ── Path containment (security) ─────────────────────────────────────────────
804
805/// Canonicalize `candidate` (resolving symlinks; for a not-yet-existing leaf,
806/// canonicalize its existing parent chain and re-append the leaf) and return it
807/// only if it resolves inside `store_root`; otherwise `Err`.
808///
809/// This is the single within-store containment gate. A wiki-link target, a
810/// rename destination, or any other caller-influenced path must pass through
811/// here before it is read or traversed, so a `..`-laden or symlink-escaping
812/// target can never turn a store operation into a read of an arbitrary file
813/// outside the store. `store_root` itself is canonicalized first so the
814/// `starts_with` comparison is symlink-stable on both sides (e.g. macOS's
815/// `/tmp` → `/private/tmp`).
816pub fn ensure_path_within_store(store_root: &Path, candidate: &Path) -> std::io::Result<PathBuf> {
817    // The `..` rejection below must apply only to the *caller-influenced* tail of
818    // the candidate — never to a `..` the trusted `store_root` itself carries.
819    // Callers build the candidate as `store_root.join(rel)`, so a user-supplied
820    // `--dir ../../some/store` legitimately seeds every candidate with leading
821    // `..` components that belong to the root, not to the sidecar/link target.
822    // Strip the trusted `store_root` prefix lexically and scrutinize only what
823    // remains; the root's own `..` is resolved safely by `canonicalize()` just
824    // below. A candidate that does NOT begin with `store_root` (an absolute
825    // out-of-store path, a CWD-relative target) keeps the whole path under
826    // scrutiny — there is no trusted prefix to exempt.
827    let scrutinized = candidate.strip_prefix(store_root).unwrap_or(candidate);
828
829    // Reject any `..` component in the scrutinized tail. A `ParentDir` can never
830    // be resolved safely by lexical normalization: once a symlink sits earlier in
831    // the path, `foo/../bar` does NOT equal `bar`, and canonicalizing the existing
832    // prefix (below) would silently collapse `records/contacts/../../outside` down
833    // to a path that *appears* inside the root, masking the traversal. There is no
834    // legitimate in-store caller that needs `..` in the tail — wiki-link targets,
835    // rename destinations, and graph reads are all forward (`Normal`-only) paths —
836    // so a tail `..` is always either an escape attempt or a malformed target.
837    if scrutinized
838        .components()
839        .any(|c| matches!(c, std::path::Component::ParentDir))
840    {
841        return Err(std::io::Error::new(
842            std::io::ErrorKind::PermissionDenied,
843            format!(
844                "path {} contains a `..` component beyond the store root {} and cannot be contained",
845                candidate.display(),
846                store_root.display()
847            ),
848        ));
849    }
850
851    // Canonicalize the root so both sides of the containment check are in the
852    // same (fully-resolved) namespace. This also resolves any `..` the root
853    // itself carries (the user-supplied `--dir`), which the tail-only check above
854    // deliberately left in place.
855    let root = store_root.canonicalize()?;
856
857    // Resolve the candidate as far as it exists on disk. `canonicalize` fails on
858    // a not-yet-existing leaf, so peel trailing components until the remaining
859    // prefix exists, canonicalize that, then re-append the peeled tail. This
860    // resolves any symlink in the existing parent chain (an escape vector) while
861    // still working for a target that does not exist yet (a rename destination).
862    let mut existing = candidate.to_path_buf();
863    let mut tail: Vec<std::ffi::OsString> = Vec::new();
864    let resolved_prefix = loop {
865        match existing.canonicalize() {
866            Ok(p) => break p,
867            Err(_) => {
868                // No existing prefix left to canonicalize → resolve relative to
869                // the canonical root (the candidate is somewhere under, or
870                // escaping from, the store) and let the containment check below
871                // decide. Pop one component and keep peeling.
872                match existing.file_name() {
873                    Some(name) => {
874                        tail.push(name.to_os_string());
875                        if !existing.pop() {
876                            // Ran out of components without finding an existing
877                            // prefix: anchor the un-resolvable remainder at the
878                            // canonical root so a relative candidate is judged
879                            // against the store, not the process CWD.
880                            break root.clone();
881                        }
882                    }
883                    None => {
884                        // A root/prefix component with no file name and no
885                        // on-disk existence: anchor at the canonical root.
886                        break root.clone();
887                    }
888                }
889            }
890        }
891    };
892
893    // Reassemble: canonical existing prefix + the peeled (still-virtual) tail,
894    // in original order (the peel pushed them reversed).
895    let mut resolved = resolved_prefix;
896    for name in tail.into_iter().rev() {
897        resolved.push(name);
898    }
899
900    if resolved.starts_with(&root) {
901        Ok(resolved)
902    } else {
903        Err(std::io::Error::new(
904            std::io::ErrorKind::PermissionDenied,
905            format!(
906                "path {} resolves outside the store root {}",
907                candidate.display(),
908                store_root.display()
909            ),
910        ))
911    }
912}
913
914// ── The shared wiki-link edge notion (graph / stats / validate / rename) ─────
915//
916// One definition of "what `[[...]]` text is a real edge" that every relationship
917// op keys on, so `forwardlinks`, `backlinks`, `links`, `stats`, and `rename`
918// never disagree with each other (or with `validate`'s body extractor):
919//
920//   1. **Fence-aware.** A `[[...]]` inside a ``` / ~~~ fenced code block is a
921//      documentation example, not an edge — exactly `validate`'s rule. Counting
922//      it as an edge over-reports backlinks, falsely un-orphans the page, and
923//      (worst) lets `rename` rewrite verbatim example text.
924//   2. **Whitespace-trimmed.** `[[ records/contacts/sarah ]]` is the same edge
925//      as `[[records/contacts/sarah]]`. The inner padding is cosmetic; both the
926//      forward and the backward view must resolve it identically.
927//   3. **Case-folded to the filesystem.** Link *resolution* is `is_file()`,
928//      which is case-insensitive on macOS/Windows. So on a case-insensitive
929//      filesystem `[[records/contacts/Sarah-Chen]]` and the on-disk
930//      `sarah-chen.md` are the SAME edge; the comparison key must case-fold to
931//      match, or backlinks/rename silently miss the link while validate (which
932//      resolves via the filesystem) considers it fine.
933
934/// Canonicalize a raw `[[...]]` inner target into the wiki-link key: forward
935/// slashes, no leading `./` or `/`, no trailing `.md`, inner whitespace trimmed.
936/// The single key forward and backward edges are compared on. Pairs with
937/// [`link_edge_key`] for the case-fold step.
938pub fn canonical_link_target(raw: &str) -> String {
939    let mut s = raw.trim().replace('\\', "/");
940    while let Some(rest) = s.strip_prefix("./") {
941        s = rest.to_string();
942    }
943    let s = s.trim_start_matches('/');
944    let s = s.strip_suffix(".md").unwrap_or(s);
945    s.trim().to_string()
946}
947
948/// The comparison key for a canonical link target: identity on a case-sensitive
949/// filesystem, ASCII-lowercased on a case-insensitive one (macOS/Windows), so
950/// the string-keyed edge comparison agrees with the filesystem's case-folding
951/// `is_file()` resolution. Callers compare `link_edge_key(a) == link_edge_key(b)`.
952pub fn link_edge_key(canonical_target: &str) -> String {
953    if fs_is_case_insensitive() {
954        canonical_target.to_ascii_lowercase()
955    } else {
956        canonical_target.to_string()
957    }
958}
959
960/// Extract every wiki-link edge target from a markdown body, fence-aware and
961/// whitespace-trimmed, in document order (duplicates kept — callers dedup).
962/// Returns canonical targets (see [`canonical_link_target`]); the case-fold for
963/// comparison is applied separately via [`link_edge_key`] so the canonical form
964/// (used for rewrites/output) stays case-preserving.
965///
966/// Scans line-by-line tracking the fence state inline (no whole-body
967/// allocation), exactly mirroring validate's `extract_wiki_links`: the fence
968/// state is a `(fence char, run length)` tracked via [`fence_opens`] /
969/// [`fence_closes`] — NOT a bool toggled on any ``` / `~~~` line. The naive
970/// toggle inverts mid-block when a `~~~` block legally contains a ```` ``` ````
971/// line (the standard way to document a backtick fence), or when a `>3`-space-
972/// indented ``` is mistaken for a fence — both of which would let a fenced
973/// example `[[…]]` leak out as a live edge (a false dependent for
974/// backlinks/rename). Fenced lines never yield edges. Within a line, the text
975/// before the first `|` is the target; a target whose trimmed form starts with
976/// `[` is the rejected triple-bracket flow-form list mis-encoding
977/// (`[[[a]], [[b]]]`), not a real link — skipped, matching validate.
978pub fn extract_edge_targets(body: &str) -> Vec<String> {
979    let mut out = Vec::new();
980    let mut fence: Option<(u8, usize)> = None;
981    for line in body.lines() {
982        let content = line.trim_end_matches('\r');
983        if let Some(f) = fence {
984            if fence_closes(content, f) {
985                fence = None;
986            }
987            continue;
988        }
989        if let Some(opened) = fence_opens(content) {
990            fence = Some(opened);
991            continue;
992        }
993        let bytes = line.as_bytes();
994        let mut i = 0usize;
995        while i + 1 < bytes.len() {
996            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
997                if let Some(close) = line[i + 2..].find("]]") {
998                    let inner = &line[i + 2..i + 2 + close];
999                    let raw_target = inner.split('|').next().unwrap_or(inner).trim();
1000                    if !raw_target.is_empty() && !raw_target.starts_with('[') {
1001                        let canonical = canonical_link_target(raw_target);
1002                        if !canonical.is_empty() {
1003                            out.push(canonical);
1004                        }
1005                    }
1006                    i = i + 2 + close + 2;
1007                    continue;
1008                }
1009            }
1010            i += 1;
1011        }
1012    }
1013    out
1014}
1015
1016/// If `line` opens a fenced code block, return `(fence byte, run length)`. The
1017/// single fence-open rule shared by [`extract_edge_targets`] and graph's
1018/// `rewrite_links_to`, mirroring validate's `fence_opens` and the parser's
1019/// `opening_fence` so every link op tracks fences identically: a fence is
1020/// ```` ``` ```` or `~~~` (run ≥ 3) at ≤ 3 spaces of indent, and a backtick
1021/// fence's info string may not itself contain a backtick.
1022pub fn fence_opens(line: &str) -> Option<(u8, usize)> {
1023    let indent = line.len() - line.trim_start_matches(' ').len();
1024    if indent > 3 {
1025        return None;
1026    }
1027    let rest = &line[indent..];
1028    let byte = rest.bytes().next()?;
1029    if byte != b'`' && byte != b'~' {
1030        return None;
1031    }
1032    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1033    if run < 3 {
1034        return None;
1035    }
1036    // A backtick fence's info string may not itself contain a backtick.
1037    if byte == b'`' && rest[run..].contains('`') {
1038        return None;
1039    }
1040    Some((byte, run))
1041}
1042
1043/// True if `line` closes the currently open `fence`: same char, run at least as
1044/// long, nothing but trailing whitespace after. Mirrors validate's
1045/// `fence_closes` / the parser's `is_closing_fence`, so an inner fence of the
1046/// *other* character (a ```` ``` ```` line inside a `~~~` block) does NOT close
1047/// the outer fence.
1048pub fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
1049    let (byte, open_len) = fence;
1050    let indent = line.len() - line.trim_start_matches(' ').len();
1051    if indent > 3 {
1052        return false;
1053    }
1054    let rest = &line[indent..];
1055    let run = rest.len() - rest.trim_start_matches(byte as char).len();
1056    if run < open_len {
1057        return false;
1058    }
1059    rest[run..].trim().is_empty()
1060}
1061
1062/// True when the host filesystem resolves paths case-insensitively (macOS/
1063/// Windows default). Probed once per process against the OS temp dir by creating
1064/// a lowercase marker and stat-ing its uppercase spelling. A probe failure
1065/// conservatively reports `false` (case-sensitive) — the historical behavior —
1066/// so a transient temp-dir issue never silently widens matching.
1067fn fs_is_case_insensitive() -> bool {
1068    use std::sync::OnceLock;
1069    static CASE_INSENSITIVE: OnceLock<bool> = OnceLock::new();
1070    *CASE_INSENSITIVE.get_or_init(|| {
1071        let dir = std::env::temp_dir();
1072        let pid = std::process::id();
1073        let nanos = SystemTime::now()
1074            .duration_since(UNIX_EPOCH)
1075            .map(|d| d.as_nanos())
1076            .unwrap_or(0);
1077        let lower = dir.join(format!(".dbmd-case-probe-{pid}-{nanos}"));
1078        let upper = dir.join(format!(".DBMD-CASE-PROBE-{pid}-{nanos}"));
1079        // Create the lowercase marker; if its uppercase spelling then resolves to
1080        // a file, the filesystem folded the case → case-insensitive.
1081        let result = match std::fs::File::create(&lower) {
1082            Ok(_) => upper.is_file(),
1083            Err(_) => false,
1084        };
1085        let _ = std::fs::remove_file(&lower);
1086        result
1087    })
1088}
1089
1090// ── Free helpers (no `self`) ────────────────────────────────────────────────
1091
1092/// True if a walk entry is a regular file, **following symlinks** so a
1093/// symlinked `.md` content file (or a file inside a symlinked type folder) is
1094/// counted like any other content file.
1095///
1096/// The store walks enable `follow_links(true)`, so a symlink entry's
1097/// `file_type()` still reports `is_symlink()` (the `ignore` walker does not
1098/// rewrite the entry's own type), not the followed target's type. Treat a
1099/// symlink whose target is a regular file as a file: `stat` (follow) the path
1100/// and check. A broken symlink (no target) is not a file.
1101fn is_file_entry(entry: &ignore::DirEntry) -> bool {
1102    match entry.file_type() {
1103        Some(ft) if ft.is_file() => true,
1104        Some(ft) if ft.is_symlink() => std::fs::metadata(entry.path())
1105            .map(|m| m.is_file())
1106            .unwrap_or(false),
1107        // A `None` file type (the walk root itself) or a non-file/non-symlink
1108        // entry is not a content file.
1109        _ => false,
1110    }
1111}
1112
1113/// True if the path ends in a `.md` extension (case-sensitive — db.md files are
1114/// lowercase `.md`).
1115fn has_md_extension(path: &Path) -> bool {
1116    path.extension().and_then(|e| e.to_str()) == Some("md")
1117}
1118
1119/// True if the basename is a non-content meta file (`DB.md`, `index.md`,
1120/// `log.md`) that the content walks must skip.
1121fn is_non_content_basename(path: &Path) -> bool {
1122    match path.file_name().and_then(|n| n.to_str()) {
1123        Some(name) => NON_CONTENT_BASENAMES.contains(&name),
1124        None => false,
1125    }
1126}
1127
1128/// Append `.md` to a bare name; leave an existing `.md` untouched.
1129fn ensure_md_extension(name: &str) -> String {
1130    if name.ends_with(".md") {
1131        name.to_string()
1132    } else {
1133        format!("{name}.md")
1134    }
1135}
1136
1137/// The canonical default folder for a recognized type, per the SPEC type table
1138/// (`email → sources/emails`, `expense → records/expenses`, …). Unrecognized
1139/// types fall back to `records/<type>` (the bare type name, no pluralization
1140/// guess) — see the store findings on the docstring's looser `<type>` phrasing.
1141fn default_type_folder(type_: &str) -> PathBuf {
1142    let path = match type_ {
1143        // sources
1144        "email" => "sources/emails",
1145        "transcript" => "sources/transcripts",
1146        "pdf-source" => "sources/docs",
1147        // records — entities
1148        "contact" => "records/contacts",
1149        "company" => "records/companies",
1150        // records — events
1151        "expense" => "records/expenses",
1152        "meeting" => "records/meetings",
1153        "decision" => "records/decisions",
1154        "invoice" => "records/invoices",
1155        // wiki — the SPEC type table files a wiki-page under `wiki/<topic>/`,
1156        // i.e. ALWAYS a sub-folder, never flat under `wiki/`. A 2-component
1157        // `wiki/<file>` path is non-conforming: `index::type_folder_of` /
1158        // `validate::type_folder_of` require `<layer>/<type-folder>/<file>` (3
1159        // components), so a flat wiki page either crashes write-through
1160        // (`on_write` tries to create `index.md` *inside* a file) or is silently
1161        // dropped from every catalog by `rebuild_all`. `topic` is the page's
1162        // canonical bucket; with only the bare type in hand here, `wiki/topics`
1163        // is the deterministic default folder (matches the dogfood store).
1164        "wiki-page" => "wiki/topics",
1165        // unrecognized: bare type name under records/
1166        other => return PathBuf::from("records").join(other),
1167    };
1168    PathBuf::from(path)
1169}
1170
1171/// The canonical [`Layer`] a `type_` belongs to, derived from its default
1172/// type-folder (`email` → `Sources`, `contact` → `Records`, `wiki-page` →
1173/// `Wiki`, unrecognized → `Records`). The write path uses this to decide whether
1174/// an agent-supplied folder is in the *right* layer for the type before honouring
1175/// its sub-folder choice.
1176pub fn layer_for_type(type_: &str) -> Layer {
1177    layer_of_folder(&default_type_folder(type_)).unwrap_or(Layer::Records)
1178}
1179
1180/// The [`Layer`] a type-folder path lives in, read from its first component
1181/// (`sources/` → `Sources`, `records/` → `Records`, `wiki/` → `Wiki`). Used to
1182/// bound [`Store::find_by_type`]'s whole-layer sidecar read to a single layer
1183/// subtree. Returns `None` for a path with no recognized layer prefix; every
1184/// value [`default_type_folder`] produces has one, so in practice this is
1185/// always `Some` on the call path — `None` degrades to a store-wide read.
1186fn layer_of_folder(folder: &Path) -> Option<Layer> {
1187    let first = folder.components().next()?.as_os_str().to_str()?;
1188    Layer::from_dir_name(first)
1189}
1190
1191/// Infer a content file's canonical `type` from its store-relative path — the
1192/// inverse of [`default_type_folder`] and the single source of truth for
1193/// path→type inference (the CLI's `fm init` calls this, never re-derives it).
1194///
1195/// Requires the canonical `<layer>/<type-folder>/<file>` 3-component shape; a
1196/// shorter path (a file directly under a layer) or an unknown leading layer
1197/// yields `None`.
1198///
1199/// Recognized `(layer, folder)` pairs map back to their canonical type. For an
1200/// unrecognized folder the fallback is the **bare folder name verbatim** (no
1201/// pluralization/singularization) so it round-trips with `default_type_folder`,
1202/// whose unrecognized fallback is the bare type name (`task` ⇄ `records/task`).
1203/// Singularizing here would break that round-trip (`records/tasks` → `task`
1204/// while `default_type_folder("task")` → `records/task`). `wiki/<topic>` always
1205/// infers `wiki-page`, since every wiki page is filed under a topic folder.
1206pub fn infer_type_from_path(rel: &Path) -> Option<String> {
1207    let mut comps = rel.components().filter_map(|c| c.as_os_str().to_str());
1208    let layer = comps.next()?;
1209    if !matches!(layer, "sources" | "records" | "wiki") {
1210        return None;
1211    }
1212    let folder = comps.next()?;
1213    // The file itself must be a third component (a real type-folder, not the
1214    // file sitting directly under the layer).
1215    comps.next()?;
1216
1217    let mapped = match (layer, folder) {
1218        ("sources", "emails") => "email",
1219        ("sources", "transcripts") => "transcript",
1220        ("sources", "docs") => "pdf-source",
1221        ("records", "contacts") => "contact",
1222        ("records", "companies") => "company",
1223        ("records", "expenses") => "expense",
1224        ("records", "meetings") => "meeting",
1225        ("records", "decisions") => "decision",
1226        ("records", "invoices") => "invoice",
1227        // Every wiki page is filed under `wiki/<topic>/`; the type is always
1228        // `wiki-page` regardless of the topic-folder name.
1229        ("wiki", _) => "wiki-page",
1230        // Unrecognized folder: the bare name, verbatim. This is the inverse of
1231        // `default_type_folder`'s unrecognized fallback (`other → records/other`)
1232        // and the round-trip would break if we pluralized/singularized here.
1233        (_, other) => other,
1234    };
1235    Some(mapped.to_string())
1236}
1237
1238/// The primary date field name for a sharding type (the field whose value
1239/// drives `<YYYY>/<MM>`). `None` means "use the `created` fallback only".
1240fn primary_date_field(type_: &str) -> Option<&'static str> {
1241    match type_ {
1242        "email" => Some("date"),
1243        "transcript" => Some("recorded_at"),
1244        "pdf-source" => Some("received_at"),
1245        "expense" | "invoice" | "meeting" => Some("date"),
1246        // recognized custom event types have no canonical date field name; they
1247        // fall back to `created`.
1248        _ => None,
1249    }
1250}
1251
1252/// Parse a YAML value into an RFC3339 [`DateTime`], accepting both an explicit
1253/// string and a YAML-native scalar rendered to string.
1254fn value_to_datetime(value: &serde_norway::Value) -> Option<DateTime<FixedOffset>> {
1255    let s = yaml_scalar_string(value)?;
1256    DateTime::parse_from_rfc3339(s.trim()).ok()
1257}
1258
1259/// Extract `(YYYY, MM)` from a YAML date/timestamp value. Lenient: matches a
1260/// leading `YYYY-MM` so a bare `2026-05-22` date and a full
1261/// `2026-05-22T10:00:00-07:00` timestamp both work.
1262fn value_to_year_month(value: &serde_norway::Value) -> Option<(String, String)> {
1263    let s = yaml_scalar_string(value)?;
1264    year_month_from_str(s.trim())
1265}
1266
1267/// `(YYYY, MM)` from the leading `YYYY-MM` of a date string.
1268fn year_month_from_str(s: &str) -> Option<(String, String)> {
1269    // Hand-roll the leading-`YYYY-MM` parse to avoid a regex compile on the
1270    // write path. Require: 4 digits, '-', 2 digits.
1271    let bytes = s.as_bytes();
1272    if bytes.len() < 7 {
1273        return None;
1274    }
1275    let is_digit = |b: u8| b.is_ascii_digit();
1276    if !(is_digit(bytes[0])
1277        && is_digit(bytes[1])
1278        && is_digit(bytes[2])
1279        && is_digit(bytes[3])
1280        && bytes[4] == b'-'
1281        && is_digit(bytes[5])
1282        && is_digit(bytes[6]))
1283    {
1284        return None;
1285    }
1286    let month: u8 = (bytes[5] - b'0') * 10 + (bytes[6] - b'0');
1287    if !(1..=12).contains(&month) {
1288        return None;
1289    }
1290    Some((s[0..4].to_string(), s[5..7].to_string()))
1291}
1292
1293/// Render a YAML scalar as a string: a real `String` verbatim, otherwise the
1294/// value's compact YAML serialization (covers timestamps that the YAML engine
1295/// may surface as a non-string scalar).
1296fn yaml_scalar_string(value: &serde_norway::Value) -> Option<String> {
1297    if let Some(s) = value.as_str() {
1298        return Some(s.to_string());
1299    }
1300    match value {
1301        serde_norway::Value::Null => None,
1302        serde_norway::Value::Mapping(_) | serde_norway::Value::Sequence(_) => None,
1303        other => serde_norway::to_string(other)
1304            .ok()
1305            .map(|s| s.trim().to_string()),
1306    }
1307}
1308
1309/// The YAML frontmatter block of a file: the text between a leading `---` fence
1310/// and the next `---` fence, exclusive. `None` if the file does not open with a
1311/// `---` fence on its first line.
1312fn frontmatter_block(text: &str) -> Option<&str> {
1313    // Tolerate a UTF-8 BOM and CRLF, but the fence must be the very first line.
1314    let body = text.strip_prefix('\u{feff}').unwrap_or(text);
1315    let mut rest = body;
1316    // First line must be exactly `---` (allowing trailing CR).
1317    let (first, after_first) = split_first_line(rest);
1318    if first.trim_end_matches('\r') != "---" {
1319        return None;
1320    }
1321    rest = after_first;
1322    let block_start = rest;
1323    let mut scanned = 0usize;
1324    loop {
1325        let (line, after) = split_first_line(rest);
1326        if line.trim_end_matches('\r') == "---" {
1327            return Some(&block_start[..scanned]);
1328        }
1329        if after.is_empty() && line.is_empty() {
1330            // Reached end of input without a closing fence.
1331            return None;
1332        }
1333        scanned += line.len() + 1; // +1 for the consumed '\n'
1334        if after.is_empty() {
1335            return None;
1336        }
1337        rest = after;
1338    }
1339}
1340
1341/// Split a string into (first line without its trailing `\n`, remainder after
1342/// the `\n`). If there is no newline, the whole string is the line and the
1343/// remainder is empty.
1344fn split_first_line(s: &str) -> (&str, &str) {
1345    match s.find('\n') {
1346        Some(i) => (&s[..i], &s[i + 1..]),
1347        None => (s, ""),
1348    }
1349}
1350
1351/// True if an [`IndexRecord`] has a field `key` equal to `value`, checking the
1352/// typed columns first and then the flattened `fields` map.
1353fn record_matches_field(record: &IndexRecord, key: &str, value: &str) -> bool {
1354    match key {
1355        "type" => record.type_ == value,
1356        "summary" => record.summary == value,
1357        "path" => record.path.to_string_lossy() == value,
1358        "created" => timestamp_matches(record.created, value),
1359        "updated" => timestamp_matches(record.updated, value),
1360        "tags" => record.tags.iter().any(|t| t == value),
1361        "links" => record.links.iter().any(|l| l == value),
1362        other => record
1363            .fields
1364            .get(other)
1365            .map(|v| json_value_matches(v, value))
1366            .unwrap_or(false),
1367    }
1368}
1369
1370/// Compare a record's `created`/`updated` instant against a query `value`.
1371///
1372/// db.md files write timestamps in several equivalent RFC3339 spellings — most
1373/// commonly the `Z` UTC designator (`2026-05-01T00:00:00Z`) but also an explicit
1374/// offset (`...+00:00`, `...-07:00`). A naive `record.created.to_rfc3339() ==
1375/// value` reformats only one side: chrono renders a UTC instant as `+00:00`, so
1376/// the `Z` form an agent reads straight out of the file would never match. We
1377/// instead parse `value` as RFC3339 and compare instants, where `Z` and `+00:00`
1378/// (and any same-instant offset) are equal. A `value` that is not valid RFC3339
1379/// can never equal a real timestamp, so it falls through to `false`.
1380fn timestamp_matches(stored: Option<DateTime<FixedOffset>>, value: &str) -> bool {
1381    match (stored, DateTime::parse_from_rfc3339(value)) {
1382        (Some(stored), Ok(queried)) => stored == queried,
1383        _ => false,
1384    }
1385}
1386
1387/// Compare a JSON field value against a query string. A string matches
1388/// verbatim; scalars match their textual form; an array matches if any element
1389/// matches (so a list-valued frontmatter field is membership-queried).
1390fn json_value_matches(v: &serde_json::Value, value: &str) -> bool {
1391    match v {
1392        serde_json::Value::String(s) => s == value,
1393        serde_json::Value::Bool(b) => b.to_string() == value,
1394        serde_json::Value::Number(n) => n.to_string() == value,
1395        serde_json::Value::Array(items) => items.iter().any(|i| json_value_matches(i, value)),
1396        // A present-but-null field never matches — consistent with the in-memory
1397        // post-filter (`query::json_value_matches`, which the first `where`
1398        // clause is NOT re-checked against, so the two must agree here or a
1399        // `--where field=` query would return different rows than `--type X
1400        // --where field=`).
1401        serde_json::Value::Null => false,
1402        serde_json::Value::Object(_) => false,
1403    }
1404}
1405
1406#[cfg(test)]
1407mod tests {
1408    use super::*;
1409    use std::fs;
1410    use tempfile::{tempdir, TempDir};
1411
1412    // ── Fixtures ────────────────────────────────────────────────────────────
1413
1414    /// Write `contents` to `<root>/<rel>`, creating parent dirs. Returns the
1415    /// store-relative path for convenient assertions.
1416    fn write(root: &Path, rel: &str, contents: &str) -> PathBuf {
1417        let abs = root.join(rel);
1418        fs::create_dir_all(abs.parent().unwrap()).unwrap();
1419        fs::write(&abs, contents).unwrap();
1420        PathBuf::from(rel)
1421    }
1422
1423    /// A minimal content file with the given `updated` timestamp in frontmatter.
1424    fn content_md(updated: &str) -> String {
1425        format!(
1426            "---\ntype: note\ncreated: {updated}\nupdated: {updated}\nsummary: a note\n---\n\nbody\n"
1427        )
1428    }
1429
1430    /// A bare directory with a `DB.md` marker (valid `db-md` frontmatter so the
1431    /// real parser is exercised).
1432    fn empty_store() -> TempDir {
1433        let dir = tempdir().unwrap();
1434        fs::write(
1435            dir.path().join("DB.md"),
1436            "---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n",
1437        )
1438        .unwrap();
1439        dir
1440    }
1441
1442    /// Open a store rooted at a TempDir; panics if `open` rejects it.
1443    fn open(dir: &TempDir) -> Store {
1444        Store::open(dir.path()).expect("fixture should be a valid store")
1445    }
1446
1447    fn rels(paths: &[PathBuf]) -> Vec<String> {
1448        paths
1449            .iter()
1450            .map(|p| p.to_string_lossy().replace('\\', "/"))
1451            .collect()
1452    }
1453
1454    // ── Layer ───────────────────────────────────────────────────────────────
1455
1456    #[test]
1457    fn layer_dir_name_and_parse_are_inverse() {
1458        for layer in Layer::all() {
1459            assert_eq!(Layer::from_dir_name(layer.dir_name()), Some(layer));
1460        }
1461        assert_eq!(Layer::Sources.dir_name(), "sources");
1462        assert_eq!(Layer::Records.dir_name(), "records");
1463        assert_eq!(Layer::Wiki.dir_name(), "wiki");
1464        assert_eq!(Layer::from_dir_name("log"), None);
1465        assert_eq!(Layer::from_dir_name("Sources"), None); // case-sensitive
1466    }
1467
1468    #[test]
1469    fn layer_order_is_canonical() {
1470        // stats keys a BTreeMap on Layer; the sort order must be sources<records<wiki.
1471        let mut v = [Layer::Wiki, Layer::Sources, Layer::Records];
1472        v.sort();
1473        assert_eq!(v, [Layer::Sources, Layer::Records, Layer::Wiki]);
1474    }
1475
1476    // ── is_db_md_store / open ────────────────────────────────────────────────
1477
1478    #[test]
1479    fn is_store_true_only_with_uppercase_marker() {
1480        let dir = tempdir().unwrap();
1481        assert!(
1482            !Store::is_db_md_store(dir.path()),
1483            "no marker → not a store"
1484        );
1485
1486        fs::write(dir.path().join("DB.md"), "---\ntype: db-md\n---\n").unwrap();
1487        assert!(Store::is_db_md_store(dir.path()), "uppercase DB.md → store");
1488    }
1489
1490    #[test]
1491    fn is_store_false_for_lowercase_db_md() {
1492        // The case-sensitivity contract: a lowercase db.md is the spec name, not
1493        // a marker — even on a case-insensitive filesystem where Path::exists
1494        // would lie. This test must pass on macOS (case-insensitive) too.
1495        let dir = tempdir().unwrap();
1496        fs::write(dir.path().join("db.md"), "---\ntype: db-md\n---\n").unwrap();
1497        assert!(
1498            !Store::is_db_md_store(dir.path()),
1499            "lowercase db.md must NOT be treated as a store marker"
1500        );
1501        assert!(Store::open(dir.path()).is_err());
1502    }
1503
1504    #[test]
1505    fn is_store_false_when_db_md_is_a_directory() {
1506        let dir = tempdir().unwrap();
1507        fs::create_dir(dir.path().join("DB.md")).unwrap();
1508        assert!(
1509            !Store::is_db_md_store(dir.path()),
1510            "a directory named DB.md is not the file marker"
1511        );
1512    }
1513
1514    #[test]
1515    fn open_rejects_non_store_with_path() {
1516        let dir = tempdir().unwrap();
1517        let err = Store::open(dir.path()).unwrap_err();
1518        assert_eq!(err.path, dir.path());
1519    }
1520
1521    #[test]
1522    fn open_succeeds_and_parses_config() {
1523        let dir = tempdir().unwrap();
1524        // A DB.md whose ## Policies declares a frozen page — proves open()
1525        // actually parsed the config rather than substituting a default.
1526        fs::write(
1527            dir.path().join("DB.md"),
1528            "---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n\n\
1529             ## Policies\n\n### Frozen pages\n- records/decisions/q1.md\n",
1530        )
1531        .unwrap();
1532        let store = Store::open(dir.path()).unwrap();
1533        assert_eq!(store.root, dir.path());
1534        assert!(
1535            store
1536                .config
1537                .frozen_pages
1538                .iter()
1539                .any(|p| p == Path::new("records/decisions/q1.md")),
1540            "open() must surface DB.md ## Policies, got {:?}",
1541            store.config.frozen_pages
1542        );
1543    }
1544
1545    // ── walk / walk_layer / walk_type_folder ─────────────────────────────────
1546
1547    #[test]
1548    fn walk_collects_content_across_layers_skipping_meta_and_log() {
1549        let dir = empty_store();
1550        let root = dir.path();
1551        write(
1552            root,
1553            "sources/emails/2026/05/a.md",
1554            &content_md("2026-05-01T00:00:00Z"),
1555        );
1556        write(
1557            root,
1558            "records/contacts/sarah.md",
1559            &content_md("2026-05-02T00:00:00Z"),
1560        );
1561        write(
1562            root,
1563            "wiki/people/sarah.md",
1564            &content_md("2026-05-03T00:00:00Z"),
1565        );
1566        // Things walk() must SKIP:
1567        write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); // catalog
1568        write(root, "index.md", "---\ntype: index\n---\n"); // root catalog
1569        write(root, "log.md", "---\ntype: log\n---\n"); // log
1570        write(root, "log/2026-04.md", "---\ntype: log\n---\n"); // rotated log archive
1571        write(
1572            root,
1573            "sources/.hidden/secret.md",
1574            &content_md("2026-05-09T00:00:00Z"),
1575        ); // hidden dir
1576        write(root, "records/contacts/notes.txt", "not markdown"); // non-md
1577
1578        let store = open(&dir);
1579        let got = rels(&store.walk().unwrap());
1580        assert_eq!(
1581            got,
1582            vec![
1583                "records/contacts/sarah.md".to_string(),
1584                "sources/emails/2026/05/a.md".to_string(),
1585                "wiki/people/sarah.md".to_string(),
1586            ]
1587        );
1588    }
1589
1590    #[test]
1591    fn walk_includes_content_named_log_md_or_db_md_inside_a_layer() {
1592        let dir = empty_store();
1593        let root = dir.path();
1594        // A content file that merely happens to be named log.md / DB.md INSIDE a
1595        // layer is real content — those names are reserved only at the store root.
1596        write(
1597            root,
1598            "records/configs/log.md",
1599            &content_md("2026-05-01T00:00:00Z"),
1600        );
1601        write(
1602            root,
1603            "sources/docs/DB.md",
1604            &content_md("2026-05-02T00:00:00Z"),
1605        );
1606        // The derived catalog twin is still skipped at any depth.
1607        write(root, "records/configs/index.md", "---\ntype: index\n---\n");
1608        let store = open(&dir);
1609        let got = rels(&store.walk().unwrap());
1610        assert!(
1611            got.contains(&"records/configs/log.md".to_string()),
1612            "layer-internal log.md is content: {got:?}"
1613        );
1614        assert!(
1615            got.contains(&"sources/docs/DB.md".to_string()),
1616            "layer-internal DB.md is content: {got:?}"
1617        );
1618        assert!(
1619            !got.iter().any(|p| p.ends_with("index.md")),
1620            "index.md is still skipped: {got:?}"
1621        );
1622    }
1623
1624    #[test]
1625    fn walk_layer_is_scoped() {
1626        let dir = empty_store();
1627        let root = dir.path();
1628        write(
1629            root,
1630            "sources/emails/2026/05/a.md",
1631            &content_md("2026-05-01T00:00:00Z"),
1632        );
1633        write(
1634            root,
1635            "records/contacts/sarah.md",
1636            &content_md("2026-05-02T00:00:00Z"),
1637        );
1638        let store = open(&dir);
1639
1640        assert_eq!(
1641            rels(&store.walk_layer(Layer::Sources).unwrap()),
1642            vec!["sources/emails/2026/05/a.md".to_string()]
1643        );
1644        assert_eq!(
1645            rels(&store.walk_layer(Layer::Records).unwrap()),
1646            vec!["records/contacts/sarah.md".to_string()]
1647        );
1648        // A layer with no directory is empty, not an error.
1649        assert!(store.walk_layer(Layer::Wiki).unwrap().is_empty());
1650    }
1651
1652    #[test]
1653    fn walk_type_folder_recurses_shards_and_accepts_abs_or_rel() {
1654        let dir = empty_store();
1655        let root = dir.path();
1656        write(
1657            root,
1658            "sources/emails/2026/05/a.md",
1659            &content_md("2026-05-01T00:00:00Z"),
1660        );
1661        write(
1662            root,
1663            "sources/emails/2026/06/b.md",
1664            &content_md("2026-06-01T00:00:00Z"),
1665        );
1666        write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); // skipped
1667                                                                           // A different type folder must not leak in.
1668        write(
1669            root,
1670            "sources/docs/2026/05/c.md",
1671            &content_md("2026-05-04T00:00:00Z"),
1672        );
1673        let store = open(&dir);
1674
1675        let expected = vec![
1676            "sources/emails/2026/05/a.md".to_string(),
1677            "sources/emails/2026/06/b.md".to_string(),
1678        ];
1679        // Relative folder arg.
1680        assert_eq!(
1681            rels(&store.walk_type_folder(Path::new("sources/emails")).unwrap()),
1682            expected
1683        );
1684        // Absolute folder arg under the store resolves identically.
1685        assert_eq!(
1686            rels(
1687                &store
1688                    .walk_type_folder(&root.join("sources/emails"))
1689                    .unwrap()
1690            ),
1691            expected
1692        );
1693    }
1694
1695    // ── recent_in_type_folder ────────────────────────────────────────────────
1696
1697    #[test]
1698    fn recent_orders_by_updated_desc_then_path_and_caps() {
1699        let dir = empty_store();
1700        let root = dir.path();
1701        // newest
1702        write(
1703            root,
1704            "records/meetings/2026/05/c.md",
1705            &content_md("2026-05-03T00:00:00Z"),
1706        );
1707        // tie on updated — path asc decides (a before b)
1708        write(
1709            root,
1710            "records/meetings/2026/05/a.md",
1711            &content_md("2026-05-02T00:00:00Z"),
1712        );
1713        write(
1714            root,
1715            "records/meetings/2026/05/b.md",
1716            &content_md("2026-05-02T00:00:00Z"),
1717        );
1718        // oldest
1719        write(
1720            root,
1721            "records/meetings/2026/04/z.md",
1722            &content_md("2026-04-01T00:00:00Z"),
1723        );
1724        let store = open(&dir);
1725
1726        let all = rels(
1727            &store
1728                .recent_in_type_folder(Path::new("records/meetings"), 10)
1729                .unwrap(),
1730        );
1731        assert_eq!(
1732            all,
1733            vec![
1734                "records/meetings/2026/05/c.md".to_string(), // newest
1735                "records/meetings/2026/05/a.md".to_string(), // tie, path asc
1736                "records/meetings/2026/05/b.md".to_string(),
1737                "records/meetings/2026/04/z.md".to_string(), // oldest
1738            ]
1739        );
1740
1741        // Cap takes the n most-recent.
1742        let top2 = rels(
1743            &store
1744                .recent_in_type_folder(Path::new("records/meetings"), 2)
1745                .unwrap(),
1746        );
1747        assert_eq!(
1748            top2,
1749            vec![
1750                "records/meetings/2026/05/c.md".to_string(),
1751                "records/meetings/2026/05/a.md".to_string(),
1752            ]
1753        );
1754    }
1755
1756    #[test]
1757    fn recent_sorts_undated_files_last() {
1758        let dir = empty_store();
1759        let root = dir.path();
1760        write(
1761            root,
1762            "records/contacts/dated.md",
1763            &content_md("2026-05-01T00:00:00Z"),
1764        );
1765        // No `updated` field at all.
1766        write(
1767            root,
1768            "records/contacts/undated.md",
1769            "---\ntype: contact\nsummary: x\n---\nbody\n",
1770        );
1771        let store = open(&dir);
1772        let got = rels(
1773            &store
1774                .recent_in_type_folder(Path::new("records/contacts"), 10)
1775                .unwrap(),
1776        );
1777        assert_eq!(
1778            got,
1779            vec![
1780                "records/contacts/dated.md".to_string(),
1781                "records/contacts/undated.md".to_string(),
1782            ],
1783            "a file with a real `updated` must outrank one with none"
1784        );
1785    }
1786
1787    // ── type_shards ──────────────────────────────────────────────────────────
1788
1789    #[test]
1790    fn type_shards_classification() {
1791        let dir = empty_store();
1792        let store = open(&dir);
1793        for t in [
1794            "email",
1795            "transcript",
1796            "pdf-source",
1797            "expense",
1798            "invoice",
1799            "meeting",
1800            "order",
1801            "ticket",
1802            "transaction",
1803        ] {
1804            assert!(store.type_shards(t), "{t} should shard");
1805        }
1806        for t in [
1807            "contact",
1808            "company",
1809            "decision",
1810            "wiki-page",
1811            "index",
1812            "log",
1813            "db-md",
1814            "proposal",
1815        ] {
1816            assert!(!store.type_shards(t), "{t} should stay flat");
1817        }
1818    }
1819
1820    #[test]
1821    fn type_shards_respects_schema_directive_both_directions() {
1822        use crate::parser::{Config, Schema};
1823        let dir = empty_store();
1824        let mut store = open(&dir);
1825        let mut config = Config::default();
1826        // A CUSTOM type (not in the built-in list) opts into date-sharding —
1827        // without the schema override `type_shards` would return false for it.
1828        config.schemas.insert(
1829            "shipment".to_string(),
1830            Schema {
1831                shard: Some(true),
1832                ..Schema::default()
1833            },
1834        );
1835        // A BUILT-IN event type opts OUT (flat) — the override wins over the
1836        // built-in default.
1837        config.schemas.insert(
1838            "expense".to_string(),
1839            Schema {
1840                shard: Some(false),
1841                ..Schema::default()
1842            },
1843        );
1844        // A schema with no `shard:` directive leaves the built-in default intact.
1845        config
1846            .schemas
1847            .insert("meeting".to_string(), Schema::default());
1848        store.config = config;
1849
1850        assert!(
1851            store.type_shards("shipment"),
1852            "custom type with `shard: by-date` must shard"
1853        );
1854        assert!(
1855            !store.type_shards("expense"),
1856            "built-in event type with `shard: flat` must go flat"
1857        );
1858        assert!(
1859            store.type_shards("meeting"),
1860            "schema without a `shard:` directive keeps the built-in default"
1861        );
1862        assert!(
1863            !store.type_shards("contact"),
1864            "unconfigured entity type stays flat"
1865        );
1866    }
1867
1868    // ── shard_path_for ───────────────────────────────────────────────────────
1869
1870    fn fm_with_extra(key: &str, value: &str) -> Frontmatter {
1871        let mut fm = Frontmatter::default();
1872        fm.extra.insert(
1873            key.to_string(),
1874            serde_norway::Value::String(value.to_string()),
1875        );
1876        fm
1877    }
1878
1879    fn fm_with_created(rfc3339: &str) -> Frontmatter {
1880        Frontmatter {
1881            created: Some(DateTime::parse_from_rfc3339(rfc3339).unwrap()),
1882            ..Default::default()
1883        }
1884    }
1885
1886    #[test]
1887    fn shard_path_uses_primary_date_field_per_type() {
1888        let dir = empty_store();
1889        let store = open(&dir);
1890
1891        // expense.date → records/expenses/<YYYY>/<MM>/
1892        let p = store
1893            .shard_path_for("expense", &fm_with_extra("date", "2026-05-22"), "lunch")
1894            .unwrap();
1895        assert_eq!(p, PathBuf::from("records/expenses/2026/05/lunch.md"));
1896
1897        // email.date → sources/emails/<YYYY>/<MM>/
1898        let p = store
1899            .shard_path_for(
1900                "email",
1901                &fm_with_extra("date", "2026-11-02T09:00:00-07:00"),
1902                "e1",
1903            )
1904            .unwrap();
1905        assert_eq!(p, PathBuf::from("sources/emails/2026/11/e1.md"));
1906
1907        // transcript.recorded_at → sources/transcripts/<YYYY>/<MM>/
1908        let p = store
1909            .shard_path_for(
1910                "transcript",
1911                &fm_with_extra("recorded_at", "2025-01-15T12:00:00Z"),
1912                "t1",
1913            )
1914            .unwrap();
1915        assert_eq!(p, PathBuf::from("sources/transcripts/2025/01/t1.md"));
1916    }
1917
1918    #[test]
1919    fn shard_path_falls_back_to_created() {
1920        let dir = empty_store();
1921        let store = open(&dir);
1922        // meeting with no `date` field but a `created` timestamp.
1923        let p = store
1924            .shard_path_for(
1925                "meeting",
1926                &fm_with_created("2024-07-09T08:30:00-04:00"),
1927                "sync",
1928            )
1929            .unwrap();
1930        assert_eq!(p, PathBuf::from("records/meetings/2024/07/sync.md"));
1931    }
1932
1933    #[test]
1934    fn shard_path_primary_field_wins_over_created() {
1935        let dir = empty_store();
1936        let store = open(&dir);
1937        let mut fm = fm_with_created("2020-01-01T00:00:00Z");
1938        fm.extra.insert(
1939            "date".into(),
1940            serde_norway::Value::String("2026-05-22".into()),
1941        );
1942        let p = store.shard_path_for("expense", &fm, "x").unwrap();
1943        // The primary `date` (2026/05), not `created` (2020/01), drives the shard.
1944        assert_eq!(p, PathBuf::from("records/expenses/2026/05/x.md"));
1945    }
1946
1947    #[test]
1948    fn shard_path_flat_types_have_no_shard_segment() {
1949        let dir = empty_store();
1950        let store = open(&dir);
1951        // A contact has a `created` date, but contacts stay flat.
1952        let p = store
1953            .shard_path_for(
1954                "contact",
1955                &fm_with_created("2026-05-22T00:00:00Z"),
1956                "sarah-chen",
1957            )
1958            .unwrap();
1959        assert_eq!(p, PathBuf::from("records/contacts/sarah-chen.md"));
1960
1961        // wiki-page is flat (no date shard) but still files under a type-folder:
1962        // `wiki/topics/<name>.md`, NEVER flat as `wiki/<name>.md`. A 2-component
1963        // path is invisible to the index/validate type-folder model.
1964        let p = store
1965            .shard_path_for("wiki-page", &Frontmatter::default(), "renewal-theme")
1966            .unwrap();
1967        assert_eq!(p, PathBuf::from("wiki/topics/renewal-theme.md"));
1968    }
1969
1970    /// Regression: a wiki-page written through the toolkit's own path
1971    /// computation must land at a path the index + validate type-folder model
1972    /// accepts. `shard_path_for("wiki-page", …)` previously returned a
1973    /// 2-component `wiki/<file>` path, which `type_folder_of` (in both `index`
1974    /// and `validate`) treats as "no type-folder" — so the page either crashed
1975    /// `Index::on_write` (it tried to create `index.md` inside a file) or was
1976    /// silently dropped from every catalog by `Index::rebuild_all`. The
1977    /// computed path must have 3 components: `<layer>/<type-folder>/<file>`.
1978    #[test]
1979    fn shard_path_wiki_page_is_indexable_three_component_path() {
1980        let dir = empty_store();
1981        let store = open(&dir);
1982        let p = store
1983            .shard_path_for("wiki-page", &Frontmatter::default(), "renewal-theme")
1984            .unwrap();
1985        // First two components are a layer + a non-empty type-folder segment;
1986        // the file is the third. This is exactly the shape `type_folder_of`
1987        // (`comps.len() >= 3`, `comps[0]` a known layer) requires.
1988        let comps: Vec<&str> = p.iter().filter_map(|c| c.to_str()).collect();
1989        assert_eq!(
1990            comps.len(),
1991            3,
1992            "wiki-page path must be <layer>/<type-folder>/<file>, got {p:?}"
1993        );
1994        assert_eq!(comps[0], "wiki", "first component must be the wiki layer");
1995        assert!(
1996            !comps[1].is_empty() && comps[1] != "renewal-theme.md",
1997            "second component must be a real type-folder, not the file: {p:?}"
1998        );
1999        assert!(
2000            comps[2].ends_with(".md"),
2001            "third component must be the .md file: {p:?}"
2002        );
2003    }
2004
2005    #[test]
2006    fn shard_path_preserves_and_adds_md_extension() {
2007        let dir = empty_store();
2008        let store = open(&dir);
2009        let with = store
2010            .shard_path_for("contact", &Frontmatter::default(), "sarah.md")
2011            .unwrap();
2012        let without = store
2013            .shard_path_for("contact", &Frontmatter::default(), "sarah")
2014            .unwrap();
2015        assert_eq!(with, PathBuf::from("records/contacts/sarah.md"));
2016        assert_eq!(without, PathBuf::from("records/contacts/sarah.md"));
2017    }
2018
2019    #[test]
2020    fn shard_path_errors_when_sharding_type_has_no_date() {
2021        let dir = empty_store();
2022        let store = open(&dir);
2023        // expense shards, but no `date` and no `created` → NoShardDate.
2024        let err = store
2025            .shard_path_for("expense", &Frontmatter::default(), "mystery")
2026            .unwrap_err();
2027        match err {
2028            StoreError::NoShardDate { file } => {
2029                assert_eq!(file, PathBuf::from("records/expenses/mystery.md"));
2030            }
2031            other => panic!("expected NoShardDate, got {other:?}"),
2032        }
2033    }
2034
2035    // ── find_links_to ────────────────────────────────────────────────────────
2036
2037    #[test]
2038    fn find_links_to_matches_all_accepted_spellings() {
2039        let dir = empty_store();
2040        let root = dir.path();
2041        let target = "records/contacts/sarah-chen";
2042
2043        // Plain link.
2044        write(
2045            root,
2046            "wiki/people/sarah.md",
2047            &format!("---\ntype: wiki-page\nsummary: s\n---\nSee [[{target}]].\n"),
2048        );
2049        // Link with display text.
2050        write(
2051            root,
2052            "records/meetings/2026/05/m.md",
2053            &format!("---\ntype: meeting\nsummary: s\n---\nWith [[{target}|Sarah]].\n"),
2054        );
2055        // Link with .md extension (accepted, warned by validate).
2056        write(
2057            root,
2058            "wiki/themes/t.md",
2059            &format!("---\ntype: wiki-page\nsummary: s\n---\n[[{target}.md]]\n"),
2060        );
2061        // A catalog/index file also contains the link literally — included.
2062        write(
2063            root,
2064            "records/contacts/index.md",
2065            &format!("---\ntype: index\n---\n- [[{target}]] — Sarah\n"),
2066        );
2067        // No link to the target.
2068        write(
2069            root,
2070            "wiki/people/elena.md",
2071            "---\ntype: wiki-page\nsummary: s\n---\nNo links here.\n",
2072        );
2073        // Short-form link must NOT match the full-path target.
2074        write(
2075            root,
2076            "wiki/people/bob.md",
2077            "---\ntype: wiki-page\nsummary: s\n---\n[[sarah-chen]]\n",
2078        );
2079        // A longer path that merely starts with the target must NOT match
2080        // (boundary correctness): target `sarah-chen` vs `sarah-chen-jr`.
2081        write(
2082            root,
2083            "wiki/people/jr.md",
2084            &format!("---\ntype: wiki-page\nsummary: s\n---\n[[{target}-jr]]\n"),
2085        );
2086
2087        let store = open(&dir);
2088        let got = rels(&store.find_links_to(Path::new(target)).unwrap());
2089        assert_eq!(
2090            got,
2091            vec![
2092                "records/contacts/index.md".to_string(),
2093                "records/meetings/2026/05/m.md".to_string(),
2094                "wiki/people/sarah.md".to_string(),
2095                "wiki/themes/t.md".to_string(),
2096            ]
2097        );
2098    }
2099
2100    #[test]
2101    fn find_links_to_distinguishes_sibling_paths() {
2102        // Two contacts whose paths share a prefix; a link to one must not be
2103        // reported as a link to the other.
2104        let dir = empty_store();
2105        let root = dir.path();
2106        write(
2107            root,
2108            "wiki/a.md",
2109            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah]]\n",
2110        );
2111        write(
2112            root,
2113            "wiki/b.md",
2114            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
2115        );
2116        let store = open(&dir);
2117
2118        assert_eq!(
2119            rels(
2120                &store
2121                    .find_links_to(Path::new("records/contacts/sarah"))
2122                    .unwrap()
2123            ),
2124            vec!["wiki/a.md".to_string()]
2125        );
2126        assert_eq!(
2127            rels(
2128                &store
2129                    .find_links_to(Path::new("records/contacts/sarah-chen"))
2130                    .unwrap()
2131            ),
2132            vec!["wiki/b.md".to_string()]
2133        );
2134    }
2135
2136    #[test]
2137    fn regression_find_links_to_tolerates_invalid_utf8_on_a_matched_line() {
2138        // Regression: a `.md` file can carry a stray non-UTF-8 byte on the SAME
2139        // line as a `[[target]]` link (a verbatim-ingested `sources/` artifact,
2140        // e.g. a mis-decoded Latin-1 import). The scan must still report the
2141        // link — `find_links_to` / `find_links_to_any` (and `graph backlinks` +
2142        // the working-set validate incoming-linker pass) must not error out and
2143        // drop the legitimate UTF-8 linkers. The content scan reads the file
2144        // with `String::from_utf8_lossy`, so the invalid byte becomes a
2145        // replacement char and the ASCII `[[target]]` link is still extracted.
2146        let dir = empty_store();
2147        let root = dir.path();
2148        let target = "records/contacts/sarah-chen";
2149
2150        // A clean, fully-UTF-8 linker that MUST be returned regardless.
2151        write(
2152            root,
2153            "wiki/people/clean.md",
2154            &format!("---\ntype: wiki-page\nsummary: s\n---\nSee [[{target}]].\n"),
2155        );
2156
2157        // A linker whose link line ALSO carries a stray 0xFF byte (a mis-decoded
2158        // Latin-1 import). Write raw bytes so the invalid byte survives — a
2159        // `&str` fixture could not express it. The byte-level regex still
2160        // matches `[[target]]` on this line; pre-fix the UTF8 sink aborted here.
2161        let mut bytes: Vec<u8> =
2162            b"---\ntype: email\nsummary: s\n---\nSee [[records/contacts/sarah-chen]] \xFF here\n"
2163                .to_vec();
2164        let dirty_abs = root.join("sources/emails/2026/05/raw.md");
2165        fs::create_dir_all(dirty_abs.parent().unwrap()).unwrap();
2166        fs::write(&dirty_abs, &bytes).unwrap();
2167        // Defensive: confirm the fixture really is invalid UTF-8 (so the test
2168        // exercises the bug, not a coincidentally-valid file).
2169        assert!(
2170            std::str::from_utf8(&bytes).is_err(),
2171            "fixture must contain invalid UTF-8 to exercise the regression"
2172        );
2173        bytes.clear();
2174
2175        let store = open(&dir);
2176        let got = rels(
2177            &store
2178                .find_links_to(Path::new(target))
2179                .expect("a stray non-UTF-8 byte must not abort the backlink scan"),
2180        );
2181        assert_eq!(
2182            got,
2183            vec![
2184                "sources/emails/2026/05/raw.md".to_string(),
2185                "wiki/people/clean.md".to_string(),
2186            ],
2187            "both the clean linker and the one with an invalid byte on the link \
2188             line are reported; the scan degrades, it does not fail"
2189        );
2190    }
2191
2192    // ── find_links_to_any (batch — the O(changed × store) fix) ─────────────────
2193
2194    /// The working-set validate's incoming-linker discovery runs through
2195    /// `find_links_to_any` over the WHOLE changed set in one pass. This pins the
2196    /// batch contract that makes that single-pass behavior correct: the result is
2197    /// the union of incoming linkers across every target, with per-target
2198    /// boundary correctness preserved (no alternation arm bleeds into a
2199    /// prefix-sharing sibling). If a regression reverts the batch finder to a
2200    /// per-object loop, the union below would still hold — but the boundary +
2201    /// union-equivalence assertions are what guard the *correctness* of folding N
2202    /// scans into one regex.
2203    #[test]
2204    fn find_links_to_any_returns_the_union_with_boundary_correctness() {
2205        let dir = empty_store();
2206        let root = dir.path();
2207
2208        // Two distinct targets, each with its own linker.
2209        write(
2210            root,
2211            "wiki/links-sarah.md",
2212            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
2213        );
2214        write(
2215            root,
2216            "wiki/links-acme.md",
2217            "---\ntype: wiki-page\nsummary: s\n---\nDeal with [[records/companies/acme|Acme]].\n",
2218        );
2219        // One file links to BOTH targets — must appear exactly once (deduped),
2220        // proving the per-file early-exit folds multiple-target hits into a
2221        // single result row rather than one row per matched target.
2222        write(
2223            root,
2224            "records/meetings/2026/05/m.md",
2225            "---\ntype: meeting\nsummary: s\n---\n[[records/contacts/sarah-chen]] re \
2226             [[records/companies/acme]]\n",
2227        );
2228        // A prefix-sharing sibling of a target: a link to `sarah-chen-jr` must NOT
2229        // be reported as a link to `sarah-chen` even though the alternation now
2230        // carries `sarah-chen` as one arm.
2231        write(
2232            root,
2233            "wiki/links-jr.md",
2234            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen-jr]]\n",
2235        );
2236        // A file that links to neither requested target.
2237        write(
2238            root,
2239            "wiki/unrelated.md",
2240            "---\ntype: wiki-page\nsummary: s\n---\n[[wiki/themes/spend]]\n",
2241        );
2242
2243        let store = open(&dir);
2244        let targets = vec![
2245            PathBuf::from("records/contacts/sarah-chen"),
2246            PathBuf::from("records/companies/acme"),
2247        ];
2248
2249        let got = rels(&store.find_links_to_any(&targets).unwrap());
2250        assert_eq!(
2251            got,
2252            vec![
2253                "records/meetings/2026/05/m.md".to_string(),
2254                "wiki/links-acme.md".to_string(),
2255                "wiki/links-sarah.md".to_string(),
2256            ],
2257            "batch finder must return the deduped union of linkers across all \
2258             targets, excluding the prefix-sibling and the unrelated file"
2259        );
2260
2261        // Equivalence: the batch result must equal the union of the per-target
2262        // single finder. This is the property the working-set path relies on
2263        // when it folds one-scan-per-object into one scan for the whole set.
2264        let mut union: std::collections::BTreeSet<PathBuf> = std::collections::BTreeSet::new();
2265        for t in &targets {
2266            for linker in store.find_links_to(t).unwrap() {
2267                union.insert(linker);
2268            }
2269        }
2270        assert_eq!(
2271            rels(&union.into_iter().collect::<Vec<_>>()),
2272            got,
2273            "find_links_to_any must equal the union of per-target find_links_to"
2274        );
2275    }
2276
2277    /// An empty target set must scan nothing and find nothing — and crucially
2278    /// must NOT compile to a match-everything empty regex (which would report
2279    /// every `.md` as a linker). This is the empty-working-set fast path the
2280    /// `validate` loop hits when nothing changed.
2281    #[test]
2282    fn find_links_to_any_empty_targets_matches_nothing() {
2283        let dir = empty_store();
2284        let root = dir.path();
2285        write(
2286            root,
2287            "wiki/a.md",
2288            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
2289        );
2290        let store = open(&dir);
2291
2292        assert!(
2293            store.find_links_to_any(&[]).unwrap().is_empty(),
2294            "no targets ⇒ no linkers (an empty pattern must not match every file)"
2295        );
2296        // A set of only empty/non-link targets is likewise a no-op, not a
2297        // match-everything.
2298        assert!(
2299            store
2300                .find_links_to_any(&[PathBuf::from(""), PathBuf::from("./")])
2301                .unwrap()
2302                .is_empty(),
2303            "targets that render to empty link text contribute no alternation arm"
2304        );
2305    }
2306
2307    // ── read_type_index ──────────────────────────────────────────────────────
2308
2309    #[test]
2310    fn read_type_index_parses_records_and_flattens_fields() {
2311        let dir = empty_store();
2312        let root = dir.path();
2313        let jsonl = "\
2314{\"path\":\"records/expenses/2026/05/a.md\",\"type\":\"expense\",\"summary\":\"lunch\",\"tags\":[\"meals\"],\"links\":[\"records/companies/acme\"],\"created\":\"2026-05-01T00:00:00Z\",\"updated\":\"2026-05-01T00:00:00Z\",\"vendor\":\"acme\",\"amount\":42}
2315{\"path\":\"records/expenses/2026/05/b.md\",\"type\":\"expense\",\"summary\":\"taxi\",\"created\":null,\"updated\":null,\"vendor\":\"yellow\"}
2316";
2317        let p = write(root, "records/expenses/index.jsonl", jsonl);
2318        let store = open(&dir);
2319        let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
2320
2321        assert_eq!(recs.len(), 2);
2322        // Sorted by path asc.
2323        assert_eq!(recs[0].path, PathBuf::from("records/expenses/2026/05/a.md"));
2324        assert_eq!(recs[0].type_, "expense");
2325        assert_eq!(recs[0].summary, "lunch");
2326        assert_eq!(recs[0].tags, vec!["meals".to_string()]);
2327        assert_eq!(recs[0].links, vec!["records/companies/acme".to_string()]);
2328        assert!(recs[0].created.is_some());
2329        // Extra (non-typed) frontmatter flattens into `fields`.
2330        assert_eq!(
2331            recs[0].fields.get("vendor"),
2332            Some(&serde_json::json!("acme"))
2333        );
2334        assert_eq!(recs[0].fields.get("amount"), Some(&serde_json::json!(42)));
2335        // Defaults: missing tags/links → empty.
2336        assert!(recs[1].tags.is_empty());
2337        assert!(recs[1].links.is_empty());
2338    }
2339
2340    #[test]
2341    fn read_type_index_last_write_wins_and_skips_blanks() {
2342        let dir = empty_store();
2343        let root = dir.path();
2344        // Same path twice; the second line supersedes the first. A blank line
2345        // in between must be ignored, not error.
2346        let jsonl = "\
2347{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"old\",\"created\":null,\"updated\":null}
2348
2349{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"new\",\"created\":null,\"updated\":null}
2350";
2351        let p = write(root, "records/contacts/index.jsonl", jsonl);
2352        let store = open(&dir);
2353        let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
2354        assert_eq!(recs.len(), 1, "duplicate path collapses to one record");
2355        assert_eq!(recs[0].summary, "new", "later line must win");
2356    }
2357
2358    #[test]
2359    fn read_type_index_errors_on_malformed_line() {
2360        let dir = empty_store();
2361        let root = dir.path();
2362        let p = write(root, "records/contacts/index.jsonl", "{not valid json}\n");
2363        let store = open(&dir);
2364        let err = store.read_type_index(&store.abs_path(&p)).unwrap_err();
2365        assert!(matches!(err, StoreError::BadTypeIndex { .. }));
2366    }
2367
2368    // ── find_by_type / find_by_where ─────────────────────────────────────────
2369
2370    fn jsonl_line(path: &str, type_: &str, summary: &str, extra: &str) -> String {
2371        format!(
2372            "{{\"path\":\"{path}\",\"type\":\"{type_}\",\"summary\":\"{summary}\",\"created\":null,\"updated\":null{extra}}}\n"
2373        )
2374    }
2375
2376    #[test]
2377    fn find_by_type_reads_canonical_folder_sidecar() {
2378        let dir = empty_store();
2379        let root = dir.path();
2380        // Canonical folder for `contact` is records/contacts.
2381        write(
2382            root,
2383            "records/contacts/index.jsonl",
2384            &(jsonl_line("records/contacts/sarah.md", "contact", "Sarah", "")
2385                + &jsonl_line("records/contacts/elena.md", "contact", "Elena", "")),
2386        );
2387        // A different type's sidecar must not leak into a contact query.
2388        write(
2389            root,
2390            "records/companies/index.jsonl",
2391            &jsonl_line("records/companies/acme.md", "company", "Acme", ""),
2392        );
2393        let store = open(&dir);
2394        let recs = store.find_by_type("contact").unwrap();
2395        let names: Vec<_> = recs.iter().map(|r| r.summary.clone()).collect();
2396        assert_eq!(names, vec!["Elena".to_string(), "Sarah".to_string()]); // path-sorted
2397        assert!(recs.iter().all(|r| r.type_ == "contact"));
2398    }
2399
2400    #[test]
2401    fn regression_find_by_type_includes_non_canonical_folder_when_canonical_exists() {
2402        // Regression for the silent-incompleteness bug: once the canonical
2403        // type-folder sidecar exists, `find_by_type` used to read ONLY that
2404        // sidecar and drop same-type records filed in a non-canonical folder in
2405        // the SAME layer — so the result flipped to incomplete the moment a
2406        // canonical record was added. The write path actively enables such a
2407        // layout (`records/clients/` for a `contact`, `wiki/<topic>/` for any
2408        // `wiki-page`), so this is a reachable, dedup-breaking omission.
2409        let dir = empty_store();
2410        let root = dir.path();
2411
2412        // CANONICAL folder sidecar exists (`records/contacts/` for `contact`),
2413        // which is exactly the condition that triggered the bug.
2414        write(
2415            root,
2416            "records/contacts/index.jsonl",
2417            &jsonl_line("records/contacts/sarah.md", "contact", "Sarah", ""),
2418        );
2419        // A `contact` filed in a NON-canonical folder within the same (Records)
2420        // layer. Pre-fix this was silently dropped because the canonical
2421        // sidecar existed; it must now come back.
2422        write(
2423            root,
2424            "records/clients/index.jsonl",
2425            &jsonl_line("records/clients/elena.md", "contact", "Elena", ""),
2426        );
2427        // A different type in the same layer must NOT leak in (proves the read
2428        // is type-filtered, not just a blind whole-layer dump).
2429        write(
2430            root,
2431            "records/companies/index.jsonl",
2432            &jsonl_line("records/companies/acme.md", "company", "Acme", ""),
2433        );
2434
2435        let store = open(&dir);
2436        let got: std::collections::BTreeSet<String> = store
2437            .find_by_type("contact")
2438            .unwrap()
2439            .into_iter()
2440            .map(|r| r.path.to_string_lossy().into_owned())
2441            .collect();
2442        assert_eq!(
2443            got,
2444            ["records/clients/elena.md", "records/contacts/sarah.md"]
2445                .into_iter()
2446                .map(String::from)
2447                .collect::<std::collections::BTreeSet<_>>(),
2448            "both the canonical-folder and the non-canonical-folder contact must \
2449             be returned; the company record must be excluded"
2450        );
2451    }
2452
2453    #[test]
2454    fn regression_find_by_type_wiki_page_spans_multiple_topic_folders() {
2455        // Regression for the scoped-backlinks variant of the same bug
2456        // (`graph backlinks --type wiki-page`): `wiki-page`'s canonical folder
2457        // is `wiki/topics`, but the SPEC files wiki pages under `wiki/<topic>/`
2458        // for ANY topic. With a `wiki/topics/index.jsonl` present, the old code
2459        // read only that folder and dropped pages in `wiki/people/`,
2460        // `wiki/projects/`, etc. — under-reporting dependents in a blast-radius
2461        // check. The whole-`wiki/`-layer read must surface all of them.
2462        let dir = empty_store();
2463        let root = dir.path();
2464        write(
2465            root,
2466            "wiki/topics/index.jsonl",
2467            &jsonl_line("wiki/topics/billing.md", "wiki-page", "Billing", ""),
2468        );
2469        write(
2470            root,
2471            "wiki/people/index.jsonl",
2472            &jsonl_line("wiki/people/sarah-chen.md", "wiki-page", "Sarah Chen", ""),
2473        );
2474        write(
2475            root,
2476            "wiki/projects/index.jsonl",
2477            &jsonl_line("wiki/projects/atlas.md", "wiki-page", "Atlas", ""),
2478        );
2479
2480        let store = open(&dir);
2481        let got: std::collections::BTreeSet<String> = store
2482            .find_by_type("wiki-page")
2483            .unwrap()
2484            .into_iter()
2485            .map(|r| r.path.to_string_lossy().into_owned())
2486            .collect();
2487        assert_eq!(
2488            got,
2489            [
2490                "wiki/people/sarah-chen.md",
2491                "wiki/projects/atlas.md",
2492                "wiki/topics/billing.md",
2493            ]
2494            .into_iter()
2495            .map(String::from)
2496            .collect::<std::collections::BTreeSet<_>>(),
2497            "a wiki-page query must return pages from every topic folder, not \
2498             just the canonical wiki/topics/"
2499        );
2500    }
2501
2502    #[test]
2503    fn find_by_type_canonical_absent_falls_back_within_the_layer_only() {
2504        let dir = empty_store();
2505        let root = dir.path();
2506        // A custom `proposal` record filed under a non-canonical folder NAME
2507        // (the natural plural `records/proposals/`) inside the records layer.
2508        // `default_type_folder("proposal")` = `records/proposal` (bare type, no
2509        // pluralization guess), so the canonical sidecar does not exist and
2510        // `find_by_type` falls back. The fallback is bounded to the type's
2511        // layer (records), so this record — same layer, non-canonical folder —
2512        // is still found: completeness within the layer holds.
2513        write(
2514            root,
2515            "records/proposals/index.jsonl",
2516            &jsonl_line("records/proposals/p1.md", "proposal", "Q3 proposal", ""),
2517        );
2518        // A DECOY of the SAME type sitting in a DIFFERENT layer (sources/). The
2519        // old whole-store fallback read every sidecar in the store and would
2520        // have leaked this into the result; the layer-bounded fallback must not.
2521        // It also pins that the fallback is O(entities-in-layer), never O(store).
2522        write(
2523            root,
2524            "sources/proposals/index.jsonl",
2525            &jsonl_line(
2526                "sources/proposals/leak.md",
2527                "proposal",
2528                "cross-layer decoy",
2529                "",
2530            ),
2531        );
2532        let store = open(&dir);
2533        let recs = store.find_by_type("proposal").unwrap();
2534        assert_eq!(
2535            recs.len(),
2536            1,
2537            "only the records-layer proposal, not the sources decoy"
2538        );
2539        assert_eq!(recs[0].summary, "Q3 proposal");
2540        assert_eq!(recs[0].path, PathBuf::from("records/proposals/p1.md"));
2541    }
2542
2543    #[test]
2544    fn find_by_type_canonical_absent_does_not_read_other_layers() {
2545        let dir = empty_store();
2546        let root = dir.path();
2547        // `email`'s canonical folder is `sources/emails` (layer Sources). No
2548        // sidecar there yet, so `find_by_type("email")` falls back — but only
2549        // within the Sources layer. A populated sidecar in the Records layer
2550        // must never be touched: the fallback is layer-bounded, not store-wide.
2551        // Under the old `read_all_type_indexes_in(None)` fallback this records
2552        // sidecar would have been read and filtered (wasted O(store) I/O); now
2553        // it is outside the walk root entirely.
2554        write(
2555            root,
2556            "records/contacts/index.jsonl",
2557            &jsonl_line("records/contacts/sarah.md", "contact", "Sarah", ""),
2558        );
2559        let store = open(&dir);
2560        // No email anywhere ⇒ empty, and the records layer was not in scope.
2561        assert!(store.find_by_type("email").unwrap().is_empty());
2562    }
2563
2564    #[test]
2565    fn find_by_where_matches_typed_columns_and_flat_fields() {
2566        let dir = empty_store();
2567        let root = dir.path();
2568        write(
2569            root,
2570            "records/expenses/index.jsonl",
2571            &(jsonl_line(
2572                "records/expenses/a.md",
2573                "expense",
2574                "lunch",
2575                ",\"vendor\":\"acme\",\"tags\":[\"meals\"]",
2576            ) + &jsonl_line(
2577                "records/expenses/b.md",
2578                "expense",
2579                "taxi",
2580                ",\"vendor\":\"yellow\"",
2581            )),
2582        );
2583        write(
2584            root,
2585            "records/contacts/index.jsonl",
2586            &jsonl_line(
2587                "records/contacts/sarah.md",
2588                "contact",
2589                "Sarah",
2590                ",\"tags\":[\"customer\"]",
2591            ),
2592        );
2593        let store = open(&dir);
2594
2595        // Flat field in `fields`.
2596        let by_vendor = store.find_by_where("vendor", "acme").unwrap();
2597        assert_eq!(by_vendor.len(), 1);
2598        assert_eq!(by_vendor[0].path, PathBuf::from("records/expenses/a.md"));
2599
2600        // Typed column: type (spans both expense records).
2601        assert_eq!(store.find_by_where("type", "expense").unwrap().len(), 2);
2602
2603        // Typed list column: tags membership.
2604        let customers = store.find_by_where("tags", "customer").unwrap();
2605        assert_eq!(customers.len(), 1);
2606        assert_eq!(
2607            customers[0].path,
2608            PathBuf::from("records/contacts/sarah.md")
2609        );
2610
2611        // No match → empty.
2612        assert!(store.find_by_where("vendor", "nobody").unwrap().is_empty());
2613    }
2614
2615    #[test]
2616    fn find_by_where_matches_timestamps_across_rfc3339_spellings() {
2617        let dir = empty_store();
2618        let root = dir.path();
2619        // db.md files most commonly carry the `Z` UTC spelling. The index.jsonl
2620        // serialized from such a file preserves it verbatim.
2621        write(
2622            root,
2623            "records/meetings/index.jsonl",
2624            "{\"path\":\"records/meetings/kickoff.md\",\"type\":\"meeting\",\
2625\"summary\":\"kickoff\",\"created\":\"2026-05-01T00:00:00Z\",\
2626\"updated\":\"2026-05-02T09:30:00-07:00\"}\n",
2627        );
2628        let store = open(&dir);
2629
2630        // The exact value an agent reads out of the file (`Z` form) must match.
2631        let by_z = store
2632            .find_by_where("created", "2026-05-01T00:00:00Z")
2633            .unwrap();
2634        assert_eq!(by_z.len(), 1);
2635        assert_eq!(by_z[0].path, PathBuf::from("records/meetings/kickoff.md"));
2636
2637        // The equivalent explicit-offset spelling of the same instant matches too.
2638        assert_eq!(
2639            store
2640                .find_by_where("created", "2026-05-01T00:00:00+00:00")
2641                .unwrap()
2642                .len(),
2643            1
2644        );
2645
2646        // A non-UTC stored value matches both its own offset spelling and the
2647        // same instant expressed as `Z` (instant comparison, not string compare).
2648        assert_eq!(
2649            store
2650                .find_by_where("updated", "2026-05-02T09:30:00-07:00")
2651                .unwrap()
2652                .len(),
2653            1
2654        );
2655        assert_eq!(
2656            store
2657                .find_by_where("updated", "2026-05-02T16:30:00Z")
2658                .unwrap()
2659                .len(),
2660            1
2661        );
2662
2663        // A different instant does not match.
2664        assert!(store
2665            .find_by_where("created", "2026-05-01T00:00:01Z")
2666            .unwrap()
2667            .is_empty());
2668        // A non-RFC3339 query value never matches a real timestamp.
2669        assert!(store
2670            .find_by_where("created", "2026-05-01")
2671            .unwrap()
2672            .is_empty());
2673    }
2674
2675    #[test]
2676    fn find_by_where_in_layer_reads_only_that_layers_sidecars() {
2677        // The O(entities-in-layer) contract: a layer-scoped where read must walk
2678        // ONLY the named layer's subtree. Proven structurally — a *malformed*
2679        // sidecar in another layer would make `read_type_index` error if it were
2680        // read, so a scoped read that succeeds (and excludes that record) is
2681        // proof the other layer's I/O never happened.
2682        let dir = empty_store();
2683        let root = dir.path();
2684        write(
2685            root,
2686            "records/companies/index.jsonl",
2687            &jsonl_line(
2688                "records/companies/acme.md",
2689                "company",
2690                "Acme",
2691                ",\"domain\":\"acme.com\"",
2692            ),
2693        );
2694        // Same field/value in the sources layer — but the sidecar is corrupt.
2695        write(
2696            root,
2697            "sources/emails/index.jsonl",
2698            "{ this is not valid json and would error if read }\n",
2699        );
2700        let store = open(&dir);
2701
2702        // Scoped to records: the corrupt sources sidecar is out of scope, so the
2703        // read succeeds and returns only the records-layer match.
2704        let in_records = store
2705            .find_by_where_in("domain", "acme.com", Some(Layer::Records))
2706            .expect("a records-scoped read must not touch the sources sidecar");
2707        assert_eq!(
2708            rels(
2709                &in_records
2710                    .iter()
2711                    .map(|r| r.path.clone())
2712                    .collect::<Vec<_>>()
2713            ),
2714            vec!["records/companies/acme.md".to_string()]
2715        );
2716
2717        // The store-wide read DOES reach the corrupt sidecar and surfaces it as
2718        // a parse error — confirming the corrupt file is genuinely in the tree
2719        // and that only the layer scope spares it.
2720        let store_wide = store.find_by_where("domain", "acme.com");
2721        assert!(
2722            matches!(store_wide, Err(StoreError::BadTypeIndex { .. })),
2723            "unscoped read walks every layer and hits the corrupt sidecar"
2724        );
2725
2726        // Scoping to the layer that holds only the corrupt sidecar still errors
2727        // (the scope includes it), proving the scope is a real subtree bound and
2728        // not a silent "skip anything that fails".
2729        let in_sources = store.find_by_where_in("domain", "acme.com", Some(Layer::Sources));
2730        assert!(matches!(in_sources, Err(StoreError::BadTypeIndex { .. })));
2731    }
2732
2733    #[test]
2734    fn find_by_where_in_missing_layer_is_empty_not_an_error() {
2735        // A layer-scoped read over a layer folder that does not exist yet must
2736        // return empty (mirrors `walk_layer`'s missing-dir guard), never a walk
2737        // error from `ignore` over a nonexistent path.
2738        let dir = empty_store();
2739        let root = dir.path();
2740        write(
2741            root,
2742            "records/contacts/index.jsonl",
2743            &jsonl_line(
2744                "records/contacts/sarah.md",
2745                "contact",
2746                "Sarah",
2747                ",\"city\":\"denver\"",
2748            ),
2749        );
2750        let store = open(&dir);
2751
2752        // `wiki/` was never created.
2753        let in_wiki = store
2754            .find_by_where_in("city", "denver", Some(Layer::Wiki))
2755            .expect("missing layer subtree is empty, not an error");
2756        assert!(in_wiki.is_empty());
2757
2758        // Same query scoped to the layer that has the record still finds it.
2759        let in_records = store
2760            .find_by_where_in("city", "denver", Some(Layer::Records))
2761            .unwrap();
2762        assert_eq!(in_records.len(), 1);
2763    }
2764
2765    // ── abs_path / rel_path ──────────────────────────────────────────────────
2766
2767    #[test]
2768    fn abs_and_rel_path_roundtrip() {
2769        let dir = empty_store();
2770        let store = open(&dir);
2771        let rel = Path::new("records/contacts/sarah.md");
2772        let abs = store.abs_path(rel);
2773        assert_eq!(abs, dir.path().join(rel));
2774        assert_eq!(store.rel_path(&abs).as_deref(), Some(rel));
2775
2776        // An absolute path is passed through unchanged by abs_path.
2777        assert_eq!(store.abs_path(&abs), abs);
2778
2779        // A path outside the store has no store-relative form.
2780        assert_eq!(store.rel_path(Path::new("/somewhere/else.md")), None);
2781    }
2782
2783    // ── infer_type_from_path (inverse of default_type_folder) ────────────────
2784
2785    #[test]
2786    fn infer_type_maps_every_recognized_folder_back_to_its_type() {
2787        let cases = [
2788            ("sources/emails/x.md", "email"),
2789            ("sources/transcripts/x.md", "transcript"),
2790            ("sources/docs/x.md", "pdf-source"),
2791            ("records/contacts/x.md", "contact"),
2792            ("records/companies/x.md", "company"),
2793            ("records/expenses/x.md", "expense"),
2794            ("records/meetings/x.md", "meeting"),
2795            ("records/decisions/x.md", "decision"),
2796            ("records/invoices/x.md", "invoice"),
2797            // Any wiki sub-folder infers `wiki-page` regardless of the topic name.
2798            ("wiki/topics/x.md", "wiki-page"),
2799            ("wiki/pricing/x.md", "wiki-page"),
2800        ];
2801        for (path, expected) in cases {
2802            assert_eq!(
2803                infer_type_from_path(Path::new(path)).as_deref(),
2804                Some(expected),
2805                "path {path} should infer type {expected}"
2806            );
2807        }
2808    }
2809
2810    #[test]
2811    fn infer_type_round_trips_with_default_type_folder() {
2812        // The canonical invariant: inference is the inverse of the forward map.
2813        // Every recognized type, routed through `default_type_folder` and then
2814        // back through `infer_type_from_path`, must return the original type.
2815        // `wiki-page` is the one many-to-one case (every topic folder maps back
2816        // to `wiki-page`), so its forward folder still round-trips.
2817        let recognized = [
2818            "email",
2819            "transcript",
2820            "pdf-source",
2821            "contact",
2822            "company",
2823            "expense",
2824            "meeting",
2825            "decision",
2826            "invoice",
2827            "wiki-page",
2828        ];
2829        for type_ in recognized {
2830            let folder = default_type_folder(type_);
2831            let file = folder.join("x.md");
2832            assert_eq!(
2833                infer_type_from_path(&file).as_deref(),
2834                Some(type_),
2835                "recognized type {type_} (folder {folder:?}) must round-trip"
2836            );
2837        }
2838    }
2839
2840    #[test]
2841    fn infer_type_round_trips_custom_types_verbatim_no_singularization() {
2842        // Regression guard for the CLI/core divergence: `default_type_folder`'s
2843        // unrecognized fallback is the BARE type name (`task → records/task`,
2844        // `tasks → records/tasks`). Inference must NOT singularize, or a custom
2845        // type would not round-trip (e.g. `records/tasks` → `task` would clash
2846        // with `default_type_folder("task") → records/task`).
2847        for custom in ["task", "tasks", "playbook", "process", "okrs", "ticket"] {
2848            let folder = default_type_folder(custom);
2849            assert_eq!(folder, PathBuf::from("records").join(custom));
2850            let file = folder.join("x.md");
2851            assert_eq!(
2852                infer_type_from_path(&file).as_deref(),
2853                Some(custom),
2854                "custom type {custom} must round-trip verbatim (no singularization)"
2855            );
2856        }
2857
2858        // The specific case named in the finding: a plural custom folder keeps
2859        // its trailing `s`; it is NOT singularized to `task`.
2860        assert_eq!(
2861            infer_type_from_path(Path::new("records/tasks/x.md")).as_deref(),
2862            Some("tasks"),
2863            "records/tasks must infer `tasks`, not `task`"
2864        );
2865    }
2866
2867    #[test]
2868    fn infer_type_requires_three_component_layer_folder_file_shape() {
2869        // Fewer than 3 components: a file directly under a layer has no
2870        // type-folder, so inference yields None (matches the old CLI contract).
2871        assert_eq!(infer_type_from_path(Path::new("records/x.md")), None);
2872        assert_eq!(infer_type_from_path(Path::new("sources/x.md")), None);
2873        assert_eq!(infer_type_from_path(Path::new("wiki/x.md")), None);
2874        assert_eq!(infer_type_from_path(Path::new("x.md")), None);
2875        // Unknown leading layer is never inferred.
2876        assert_eq!(infer_type_from_path(Path::new("foo/bar/x.md")), None);
2877        // Deeper paths still infer from the first type-folder segment (e.g. a
2878        // sharded record under records/expenses/2026/05/x.md).
2879        assert_eq!(
2880            infer_type_from_path(Path::new("records/expenses/2026/05/x.md")).as_deref(),
2881            Some("expense"),
2882        );
2883    }
2884
2885    // ── ensure_path_within_store (containment) ───────────────────────────────
2886
2887    #[test]
2888    fn ensure_path_within_store_accepts_in_store_and_rejects_escape() {
2889        let dir = tempdir().unwrap();
2890        let root = dir.path();
2891        fs::create_dir_all(root.join("records/contacts")).unwrap();
2892        fs::write(root.join("records/contacts/sarah.md"), "x").unwrap();
2893
2894        // An existing in-store file resolves and is accepted.
2895        let inside = root.join("records/contacts/sarah.md");
2896        let got = ensure_path_within_store(root, &inside).expect("in-store path accepted");
2897        // Canonical, but still under the (canonical) root.
2898        assert!(got.starts_with(root.canonicalize().unwrap()));
2899
2900        // A not-yet-existing in-store leaf is accepted (rename destination).
2901        let new_leaf = root.join("records/contacts/sarah-chen.md");
2902        assert!(
2903            ensure_path_within_store(root, &new_leaf).is_ok(),
2904            "a non-existent in-store leaf must be accepted"
2905        );
2906
2907        // A `..`-escaping path is rejected even though its prefix exists.
2908        let escape = root.join("records/contacts/../../outside/secret.md");
2909        assert!(
2910            ensure_path_within_store(root, &escape).is_err(),
2911            "a `..`-escaping path must be rejected"
2912        );
2913    }
2914
2915    #[test]
2916    fn ensure_path_within_store_rejects_symlink_escape() {
2917        let dir = tempdir().unwrap();
2918        let root = dir.path().join("store");
2919        fs::create_dir_all(&root).unwrap();
2920        let outside_dir = dir.path().join("outside");
2921        fs::create_dir_all(&outside_dir).unwrap();
2922        let secret = outside_dir.join("secret.md");
2923        fs::write(&secret, "TOPSECRET").unwrap();
2924
2925        // A symlink inside the store that points OUTSIDE it must be rejected:
2926        // resolving the symlink lands outside the canonical root.
2927        #[cfg(unix)]
2928        {
2929            use std::os::unix::fs::symlink;
2930            let link = root.join("escape.md");
2931            symlink(&secret, &link).unwrap();
2932            assert!(
2933                ensure_path_within_store(&root, &link).is_err(),
2934                "a symlink resolving outside the store must be rejected"
2935            );
2936        }
2937    }
2938
2939    // ── shared link-edge notion (fence / whitespace / case) ──────────────────
2940
2941    #[test]
2942    fn extract_edge_targets_trims_inner_whitespace() {
2943        // Padded `[[ x ]]` is the same edge as `[[x]]`.
2944        assert_eq!(
2945            extract_edge_targets("See [[ records/contacts/sarah ]] today."),
2946            vec!["records/contacts/sarah".to_string()]
2947        );
2948    }
2949
2950    #[test]
2951    fn extract_edge_targets_skips_fenced_code_blocks() {
2952        // A `[[...]]` inside a ``` fence is a doc example, NOT an edge — matching
2953        // validate's body extractor.
2954        let body = "\
2955Real [[records/contacts/sarah]] link.
2956
2957```markdown
2958[[records/contacts/ghost-example]] is how you link.
2959```
2960
2961After fence [[records/companies/acme]].
2962";
2963        let got = extract_edge_targets(body);
2964        assert_eq!(
2965            got,
2966            vec![
2967                "records/contacts/sarah".to_string(),
2968                "records/companies/acme".to_string(),
2969            ],
2970            "fenced example link must not be an edge"
2971        );
2972    }
2973
2974    #[test]
2975    fn extract_edge_targets_handles_nested_indented_and_long_run_fences() {
2976        // Regression for the naive `starts_with("```")/("~~~")` toggle: a fence
2977        // nested inside another, an over-indented (>3 space) marker, and a
2978        // long-run fence wrapping a shorter inner one must all leave the block's
2979        // links un-extracted (validate treats the whole block as opaque). The
2980        // (char, run-length) tracker keys on the OPENING fence and closes only on
2981        // a matching char with run ≥ the opener.
2982
2983        // (a) A ```` ```` ````-run block (run 4) wrapping a ``` example (run 3).
2984        // The inner ``` does NOT close the outer run-4 fence, so both `[[...]]`
2985        // inside stay fenced.
2986        let nested = "\
2987Doc:
2988
2989````
2990```
2991[[records/contacts/bob]]
2992```
2993still fenced [[records/contacts/bob]]
2994````
2995
2996Real [[records/companies/acme]].
2997";
2998        assert_eq!(
2999            extract_edge_targets(nested),
3000            vec!["records/companies/acme".to_string()],
3001            "a nested ``` inside a ````-run fence must not leak the fenced links"
3002        );
3003
3004        // (b) A `~~~` block containing a ``` line (the standard way to document a
3005        // backtick fence). The inner backtick line must not flip the state.
3006        let tilde_wraps_backtick = "\
3007~~~
3008```
3009[[records/contacts/ghost]]
3010```
3011~~~
3012
3013After [[records/companies/acme]].
3014";
3015        assert_eq!(
3016            extract_edge_targets(tilde_wraps_backtick),
3017            vec!["records/companies/acme".to_string()],
3018            "a ``` line inside a ~~~ block must not invert the fence state"
3019        );
3020
3021        // (c) An over-indented ```` ``` ```` (4 spaces) is NOT a fence; the link
3022        // on the next line is live.
3023        let over_indented = "    ```\nLive [[records/contacts/sarah]].\n";
3024        assert_eq!(
3025            extract_edge_targets(over_indented),
3026            vec!["records/contacts/sarah".to_string()],
3027            "a >3-space-indented ``` is not a fence opener"
3028        );
3029    }
3030
3031    #[test]
3032    fn canonical_link_target_strips_md_dotslash_and_trims() {
3033        assert_eq!(canonical_link_target("  records/x.md  "), "records/x");
3034        assert_eq!(canonical_link_target("./wiki/y"), "wiki/y");
3035        assert_eq!(canonical_link_target("/records/z"), "records/z");
3036    }
3037
3038    #[test]
3039    fn link_edge_key_folds_case_only_on_case_insensitive_fs() {
3040        let a = link_edge_key("records/contacts/Sarah-Chen");
3041        let b = link_edge_key("records/contacts/sarah-chen");
3042        if fs_is_case_insensitive() {
3043            assert_eq!(a, b, "case-insensitive FS must fold the key");
3044        } else {
3045            assert_ne!(a, b, "case-sensitive FS must keep the key case-exact");
3046        }
3047    }
3048
3049    // ── walk follows symlinked content ───────────────────────────────────────
3050
3051    #[cfg(unix)]
3052    #[test]
3053    fn walk_includes_symlinked_content_file_and_symlinked_folder() {
3054        use std::os::unix::fs::symlink;
3055        let dir = empty_store();
3056        let root = dir.path();
3057        // A regular file (control).
3058        write(
3059            root,
3060            "records/contacts/sarah.md",
3061            &content_md("2026-05-01T00:00:00Z"),
3062        );
3063        // A symlinked .md content file inside a real folder.
3064        let external_file = root.join("external-elena.md");
3065        fs::write(&external_file, content_md("2026-05-02T00:00:00Z")).unwrap();
3066        symlink(&external_file, root.join("records/contacts/elena.md")).unwrap();
3067        // A symlinked type folder.
3068        let external_dir = dir.path().join("external-companies");
3069        fs::create_dir_all(&external_dir).unwrap();
3070        fs::write(
3071            external_dir.join("acme.md"),
3072            content_md("2026-05-03T00:00:00Z"),
3073        )
3074        .unwrap();
3075        symlink(&external_dir, root.join("records/companies")).unwrap();
3076
3077        let store = open(&dir);
3078        let got = rels(&store.walk().unwrap());
3079        assert!(
3080            got.contains(&"records/contacts/elena.md".to_string()),
3081            "a symlinked content file must be walked: {got:?}"
3082        );
3083        assert!(
3084            got.contains(&"records/companies/acme.md".to_string()),
3085            "a file inside a symlinked type folder must be walked: {got:?}"
3086        );
3087    }
3088
3089    // ── find_links_to: padded / fenced / case ────────────────────────────────
3090
3091    #[test]
3092    fn find_links_to_matches_whitespace_padded_link() {
3093        let dir = empty_store();
3094        let root = dir.path();
3095        write(
3096            root,
3097            "wiki/people/a.md",
3098            "---\ntype: wiki-page\nsummary: s\n---\nSee [[ records/contacts/sarah ]] today.\n",
3099        );
3100        let store = open(&dir);
3101        let got = rels(
3102            &store
3103                .find_links_to(Path::new("records/contacts/sarah"))
3104                .unwrap(),
3105        );
3106        assert_eq!(
3107            got,
3108            vec!["wiki/people/a.md".to_string()],
3109            "a padded `[[ x ]]` link must be found as a backward edge, matching forwardlinks"
3110        );
3111    }
3112
3113    #[test]
3114    fn find_links_to_ignores_fenced_example_link() {
3115        let dir = empty_store();
3116        let root = dir.path();
3117        write(
3118            root,
3119            "wiki/topics/howto.md",
3120            "---\ntype: wiki-page\nsummary: s\n---\n```markdown\n[[records/contacts/sarah]]\n```\n",
3121        );
3122        let store = open(&dir);
3123        let got = store
3124            .find_links_to(Path::new("records/contacts/sarah"))
3125            .unwrap();
3126        assert!(
3127            got.is_empty(),
3128            "a `[[...]]` only inside a fenced code block is not a backward edge: {got:?}"
3129        );
3130    }
3131
3132    #[cfg(unix)]
3133    #[test]
3134    fn find_links_to_matches_case_variant_on_case_insensitive_fs() {
3135        // Only meaningful on a case-insensitive filesystem; on a case-sensitive
3136        // one the case-variant link is genuinely a different target.
3137        if !fs_is_case_insensitive() {
3138            return;
3139        }
3140        let dir = empty_store();
3141        let root = dir.path();
3142        write(
3143            root,
3144            "wiki/people/bio.md",
3145            "---\ntype: wiki-page\nsummary: s\n---\nSee [[records/contacts/Sarah-Chen]].\n",
3146        );
3147        let store = open(&dir);
3148        let got = rels(
3149            &store
3150                .find_links_to(Path::new("records/contacts/sarah-chen"))
3151                .unwrap(),
3152        );
3153        assert_eq!(
3154            got,
3155            vec!["wiki/people/bio.md".to_string()],
3156            "a case-variant link must be found on a case-insensitive filesystem"
3157        );
3158    }
3159}