Skip to main content

dbmd_core/
store.rs

1//! `store` — walk, locate, and shard a db.md store.
2//!
3//! A db.md store is one directory marked by an uppercase `DB.md` at its root.
4//! [`Store::open`] is the single gate every store-walking subcommand goes
5//! through; a missing `DB.md` is the [`NotAStore`] error (`NOT_A_STORE`). The
6//! toolkit never guesses a store root.
7//!
8//! Scale discipline lives here: [`Store::walk`] and the layer/type-folder
9//! walks are **SWEEP** primitives used only by `validate --all`,
10//! `index rebuild`, and `stats`. The interactive loop instead uses
11//! [`Store::find_links_to`] / [`Store::find_links_to_any`] (embedded ripgrep,
12//! presence-only) and the `index.jsonl` sidecar readers
13//! ([`Store::find_by_type`] / [`Store::find_by_where`] /
14//! [`Store::read_type_index`]) — never a whole-store parse. The batch
15//! [`Store::find_links_to_any`] is what keeps the working-set validate's
16//! incoming-linker discovery a single store scan rather than one scan per
17//! changed object.
18
19use std::collections::BTreeMap;
20use std::path::{Path, PathBuf};
21
22use chrono::{DateTime, Datelike, FixedOffset};
23use grep::regex::RegexMatcher;
24use grep::searcher::sinks::UTF8;
25use grep::searcher::Searcher;
26use ignore::WalkBuilder;
27
28use crate::index::IndexRecord;
29use crate::parser::{parse_db_md, Config, Frontmatter};
30
31/// Basenames that are never content files: the config marker and the two
32/// curator-maintained catalogs. The store walks skip these so a SWEEP over the
33/// content layers never mistakes a catalog for a record.
34const NON_CONTENT_BASENAMES: [&str; 3] = ["DB.md", "index.md", "log.md"];
35
36/// The complete machine-twin sidecar that backs every structured read.
37const TYPE_INDEX_FILE: &str = "index.jsonl";
38
39/// Returned when a path is opened as a store but has no `DB.md` at its root.
40/// Surfaced as the structured code `NOT_A_STORE` with a non-zero exit.
41#[derive(Debug, thiserror::Error)]
42#[error("not a db.md store: {path} has no DB.md")]
43pub struct NotAStore {
44    /// The path that was inspected.
45    pub path: PathBuf,
46}
47
48/// Errors from store-level operations (walk, locate, shard, sidecar read).
49#[derive(Debug, thiserror::Error)]
50pub enum StoreError {
51    /// A sidecar `index.jsonl` could not be read or parsed.
52    #[error("failed to read type index {path}: {message}")]
53    BadTypeIndex {
54        /// The sidecar file.
55        path: PathBuf,
56        /// What went wrong.
57        message: String,
58    },
59
60    /// A required date field for sharding was absent or unparseable, and there
61    /// was no usable fallback.
62    #[error("cannot compute shard path for {file}: no usable date field")]
63    NoShardDate {
64        /// The file being placed.
65        file: PathBuf,
66    },
67
68    /// An embedded-ripgrep scan failed to start or run.
69    #[error("search failed under {root}: {message}")]
70    Search {
71        /// The root the scan ran under.
72        root: PathBuf,
73        /// What went wrong.
74        message: String,
75    },
76
77    /// An underlying I/O failure.
78    #[error(transparent)]
79    Io(#[from] std::io::Error),
80}
81
82/// The three canonical layers of a db.md store.
83///
84/// `Ord`/`PartialOrd` are derived (additively) because sibling modules key
85/// `BTreeMap`s on `Layer` (e.g. `stats::Stats::files_per_layer`); the canonical
86/// declaration order (`Sources` < `Records` < `Wiki`) is the sort order.
87#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
88pub enum Layer {
89    /// `sources/` — raw evidence; immutable; date-sharded at scale.
90    Sources,
91    /// `records/` — atomic typed data; entity types flat, event types sharded.
92    Records,
93    /// `wiki/` — curator-synthesized narrative; flat.
94    Wiki,
95}
96
97impl Layer {
98    /// The on-disk folder name for this layer (`"sources"` / `"records"` /
99    /// `"wiki"`).
100    pub fn dir_name(self) -> &'static str {
101        match self {
102            Layer::Sources => "sources",
103            Layer::Records => "records",
104            Layer::Wiki => "wiki",
105        }
106    }
107
108    /// Parse a layer from its folder name; `None` for anything else.
109    pub fn from_dir_name(name: &str) -> Option<Self> {
110        match name {
111            "sources" => Some(Layer::Sources),
112            "records" => Some(Layer::Records),
113            "wiki" => Some(Layer::Wiki),
114            _ => None,
115        }
116    }
117
118    /// Every layer, in canonical order.
119    pub fn all() -> [Layer; 3] {
120        [Layer::Sources, Layer::Records, Layer::Wiki]
121    }
122}
123
124/// An opened db.md store: its root path plus the parsed `DB.md` [`Config`].
125///
126/// Construct via [`Store::open`]; that is the only path in, and it validates
127/// the `DB.md` marker so downstream code can assume a real store.
128#[derive(Debug, Clone)]
129pub struct Store {
130    /// The store root (the directory containing `DB.md`).
131    pub root: PathBuf,
132    /// The parsed `DB.md` config (agent instructions, policies, schemas).
133    pub config: Config,
134}
135
136impl Store {
137    /// True if `path` is a db.md store root: an uppercase `DB.md` file exists
138    /// at `path`. On case-sensitive filesystems a lowercase `db.md` must NOT
139    /// count (the lowercase name refers to the project/spec, not the marker).
140    pub fn is_db_md_store(path: &Path) -> bool {
141        // Read the directory and match the *stored* filename byte-for-byte.
142        // `path.join("DB.md").exists()` would lie on a case-insensitive
143        // filesystem (macOS default), where a lowercase `db.md` answers a
144        // `DB.md` probe. `read_dir` returns the real on-disk name, so the
145        // exact-match check is correct on both case-sensitive (Linux) and
146        // case-insensitive filesystems.
147        let entries = match std::fs::read_dir(path) {
148            Ok(entries) => entries,
149            Err(_) => return false,
150        };
151        for entry in entries.flatten() {
152            if entry.file_name() == "DB.md" {
153                // A directory literally named `DB.md` is not the marker.
154                match entry.file_type() {
155                    Ok(ft) if ft.is_dir() => return false,
156                    Ok(_) => return true,
157                    Err(_) => return false,
158                }
159            }
160        }
161        false
162    }
163
164    /// Open `path` as a db.md store and require `DB.md` to be readable and
165    /// parseable. Normal commands should enter through this strict gate so a
166    /// damaged config cannot silently disable schema or policy rules.
167    pub fn open_strict(path: &Path) -> crate::Result<Store> {
168        if !Store::is_db_md_store(path) {
169            return Err(NotAStore {
170                path: path.to_path_buf(),
171            }
172            .into());
173        }
174        let db_md = path.join("DB.md");
175        let text = std::fs::read_to_string(&db_md)?;
176        let config = parse_db_md(&text, &db_md)?;
177        Ok(Store {
178            root: path.to_path_buf(),
179            config,
180        })
181    }
182
183    /// Open `path` as a db.md store: confirm the `DB.md` marker (else
184    /// [`NotAStore`]) and parse the `DB.md` config when possible. This is the
185    /// lenient validation-oriented open path: a damaged `DB.md` still marks the
186    /// directory as a store so `dbmd validate` can report the config error as an
187    /// issue. Normal CLI commands should use [`Store::open_strict`] instead.
188    pub fn open(path: &Path) -> Result<Store, NotAStore> {
189        if !Store::is_db_md_store(path) {
190            return Err(NotAStore {
191                path: path.to_path_buf(),
192            });
193        }
194        let db_md = path.join("DB.md");
195        // The marker exists; parse its config. A read or parse failure leaves
196        // the store openable with default config rather than masquerading as
197        // NOT_A_STORE — the marker is present, so this *is* a store; a damaged
198        // DB.md is `dbmd validate`'s job to report, not `open`'s.
199        let config = match std::fs::read_to_string(&db_md) {
200            Ok(text) => parse_db_md(&text, &db_md).unwrap_or_default(),
201            Err(_) => Config::default(),
202        };
203        Ok(Store {
204            root: path.to_path_buf(),
205            config,
206        })
207    }
208
209    /// **SWEEP.** Recursively iterate every `.md` content file across
210    /// `sources/`, `records/`, and `wiki/`, skipping hidden dirs and `log/`.
211    /// Used only by `validate --all`, `index rebuild`, and `stats` — never on
212    /// the interactive loop.
213    pub fn walk(&self) -> Result<Vec<PathBuf>, StoreError> {
214        // Only the three content layers — never root meta files (`DB.md`,
215        // `index.md`, `log.md`) and never `log/`, which live at root and are
216        // outside every layer dir.
217        let mut out = Vec::new();
218        for layer in Layer::all() {
219            out.extend(self.walk_layer(layer)?);
220        }
221        out.sort();
222        Ok(out)
223    }
224
225    /// **SWEEP.** Like [`Store::walk`] but scoped to a single layer.
226    pub fn walk_layer(&self, layer: Layer) -> Result<Vec<PathBuf>, StoreError> {
227        let layer_root = self.root.join(layer.dir_name());
228        if !layer_root.is_dir() {
229            return Ok(Vec::new());
230        }
231        self.walk_content_md(&layer_root)
232    }
233
234    /// Enumerate every `.md` file in a single type-folder, **recursing through
235    /// its date-shards** (`sources/emails/**/*.md`). The unit the index builder
236    /// and per-folder rebuild operate on. SWEEP-class (scoped to one folder).
237    pub fn walk_type_folder(&self, type_folder: &Path) -> Result<Vec<PathBuf>, StoreError> {
238        let abs = self.resolve_under_root(type_folder);
239        if !abs.is_dir() {
240            return Ok(Vec::new());
241        }
242        self.walk_content_md(&abs)
243    }
244
245    /// The ≤`n` most-recent files in a type-folder by frontmatter `updated`
246    /// (descending), ties broken by store-relative path (ascending) — a total
247    /// order, so write-through and rebuild never disagree on #500 vs #501.
248    ///
249    /// Reads `updated` across the folder's shards — a SWEEP cost absorbed into
250    /// `index rebuild`. The write-through path never calls this. The
251    /// cap-selection primitive for the 500-entry `index.md` browse view.
252    pub fn recent_in_type_folder(
253        &self,
254        type_folder: &Path,
255        n: usize,
256    ) -> Result<Vec<PathBuf>, StoreError> {
257        let files = self.walk_type_folder(type_folder)?;
258        // (updated, rel-path) for each file. Files missing/unparseable
259        // `updated` sort *after* dated ones (None last), then by path — so they
260        // are deterministically the lowest-priority candidates for the cap, not
261        // dropped silently. The total order (updated desc, path asc) is what
262        // keeps write-through and rebuild agreeing on #500 vs #501.
263        let mut keyed: Vec<(Option<DateTime<FixedOffset>>, PathBuf)> = files
264            .into_iter()
265            .map(|rel| {
266                let updated = self.read_updated(&self.abs_path(&rel));
267                (updated, rel)
268            })
269            .collect();
270        keyed.sort_by(|a, b| {
271            // `updated` descending: newest first. `None` is treated as the
272            // oldest possible, so dated files always win a cap slot over
273            // undated ones.
274            let by_updated = b.0.cmp(&a.0);
275            by_updated.then_with(|| a.1.cmp(&b.1))
276        });
277        keyed.truncate(n);
278        Ok(keyed.into_iter().map(|(_, rel)| rel).collect())
279    }
280
281    /// The shard/flat predicate: true if the type date-shards, false if it
282    /// stays flat. True for source types and event record types
283    /// (`expense`/`invoice`/`meeting` + custom `order`/`ticket`/`transaction`),
284    /// or when `DB.md ## Schemas` declares `shard: by-date`. False for
285    /// dedup-bounded entity types (`contact`/`company`/`decision`) and `wiki/`.
286    pub fn type_shards(&self, type_: &str) -> bool {
287        // Built-in classification. Sharding is a property of the *type*:
288        //  - source types carry a primary date field and shard;
289        //  - event record types track business volume and shard;
290        //  - dedup-bounded entity types and curation-bounded wiki stay flat.
291        // NOTE: the SPEC's `DB.md ## Schemas` `shard: by-date` override has no
292        // representation in the frozen `Schema`/`FieldSpec` types (no shard
293        // flag), so it cannot be consulted here yet — see the store findings.
294        matches!(
295            type_,
296            // source types
297            "email" | "transcript" | "pdf-source"
298            // event record types (canonical)
299            | "expense" | "invoice" | "meeting"
300            // event record types (recognized custom, per the plan)
301            | "order" | "ticket" | "transaction"
302        )
303    }
304
305    /// Compute the canonical write path for a new file. For a sharding type
306    /// (per [`Store::type_shards`]) insert `<YYYY>/<MM>/` from the type's
307    /// primary date field (`email.date`, `expense.date`, … fallback `created`)
308    /// under the type folder; flat types and `wiki/` get no shard segment.
309    /// Deterministic + stable: same input → same path, so a record never moves
310    /// once written.
311    pub fn shard_path_for(
312        &self,
313        type_: &str,
314        frontmatter: &Frontmatter,
315        name: &str,
316    ) -> Result<PathBuf, StoreError> {
317        self.shard_path_in(&default_type_folder(type_), type_, frontmatter, name)
318    }
319
320    /// Like [`Store::shard_path_for`], but compute the path under an explicit,
321    /// caller-resolved type-folder rather than the canonical default. This lets a
322    /// write surface honour an agent-supplied conforming sub-folder — e.g.
323    /// `wiki/projects/`, `wiki/people/`, `wiki/synthesis/` (the SPEC files a
324    /// `wiki-page` under `wiki/<topic>/`, i.e. ANY topic sub-folder, not only the
325    /// `wiki/topics` default) — while still applying date-sharding for sharding
326    /// types. The folder must be a conforming `<layer>/<type-folder>` (2
327    /// components, recognized layer); the caller is responsible for that (see the
328    /// CLI's `resolve_write_path`), so it is taken as given here.
329    ///
330    /// Sharding is still a property of the *type*: a sharding type gets the
331    /// `<YYYY>/<MM>` segment under `folder`; a flat type lands directly in it.
332    pub fn shard_path_in(
333        &self,
334        folder: &Path,
335        type_: &str,
336        frontmatter: &Frontmatter,
337        name: &str,
338    ) -> Result<PathBuf, StoreError> {
339        let folder = folder.to_path_buf();
340        let filename = ensure_md_extension(name);
341
342        if !self.type_shards(type_) {
343            // Flat type (entity records, wiki, decisions): no shard segment.
344            return Ok(folder.join(filename));
345        }
346
347        // Sharding type: derive <YYYY>/<MM> from the primary date field, with
348        // `created` as the universal fallback. Reading the public `Frontmatter`
349        // fields directly (typed `created`/`updated` + raw `extra`) avoids the
350        // not-yet-implemented `Frontmatter::get`/`parse` and keeps this pure.
351        let (year, month) = self
352            .primary_shard_segment(type_, frontmatter)
353            .ok_or_else(|| StoreError::NoShardDate {
354                file: folder.join(&filename),
355            })?;
356
357        Ok(folder.join(year).join(month).join(filename))
358    }
359
360    /// Find files with an incoming wiki-link to `target`, via **embedded
361    /// ripgrep** for `[[target]]` across all layers. Loop-fast; no whole-graph
362    /// build. Returns store-relative paths.
363    pub fn find_links_to(&self, target: &Path) -> Result<Vec<PathBuf>, StoreError> {
364        // A single target is just the degenerate batch case — one alternation
365        // arm, one store scan. Routing through `find_links_to_any` keeps the
366        // pattern construction and the scan loop in exactly one place. The
367        // batch API takes `&[PathBuf]`, so the one-element slice is owned (a
368        // single alloc on this single-target convenience path; the batch path
369        // validate.rs rides is untouched).
370        self.find_links_to_any(&[target.to_path_buf()])
371    }
372
373    /// Find every file with an incoming wiki-link to **any** of `targets`, in a
374    /// **single embedded-ripgrep pass** over the store (one `.md` walk, one
375    /// presence-only scan per file). This is the batch incoming-linker finder the
376    /// working-set [`crate::validate::validate_working_set`] sits on: it must find
377    /// the linkers for the *whole* changed set without paying a full store read
378    /// per changed object. Cost is therefore one store scan (O(store)), NOT
379    /// `targets.len() × store` — calling [`find_links_to`](Self::find_links_to)
380    /// in a loop would reread every `.md` once per target and is the exact
381    /// `O(changed × store)` blow-up this method exists to prevent. Returns
382    /// store-relative paths (deduped, sorted).
383    ///
384    /// Why content scan and not the sidecar `links` field: the sidecar projects
385    /// only the frontmatter `links:` array, so it misses edges written in the
386    /// body or in typed fields (`company: [[…]]`). Finding an incoming link to an
387    /// arbitrary path therefore requires reading file content — the same reason
388    /// the single-target finder uses ripgrep.
389    pub fn find_links_to_any(&self, targets: &[PathBuf]) -> Result<Vec<PathBuf>, StoreError> {
390        // The wiki-link doctrine: a link is the full store-relative path, no
391        // `.md` extension. A reference to a target therefore appears literally
392        // as `[[<target>]]`, optionally with a `|display` suffix and (warned
393        // but accepted) a trailing `.md`. Build ONE regex that matches all
394        // accepted spellings of an incoming link to ANY target, escaping each
395        // target so path separators / dots stay literal and the alternation
396        // arms keep their boundaries (a link to `sarah` never matches
397        // `sarah-chen`).
398        let mut arms: Vec<String> = Vec::new();
399        for target in targets {
400            let target_str = path_to_link_str(target);
401            if target_str.is_empty() {
402                continue;
403            }
404            // [[ <target> (.md)? ( | display )? ]]
405            arms.push(format!(
406                r"\[\[{}(\.md)?(\|[^\]]*)?\]\]",
407                regex::escape(&target_str)
408            ));
409        }
410        // No usable targets → no possible incoming links, and an empty pattern
411        // would compile to a match-everything regex. Short-circuit instead.
412        if arms.is_empty() {
413            return Ok(Vec::new());
414        }
415        let pattern = arms.join("|");
416
417        let matcher = RegexMatcher::new(&pattern).map_err(|e| StoreError::Search {
418            root: self.root.clone(),
419            message: format!("invalid backlink pattern: {e}"),
420        })?;
421
422        let mut hits = std::collections::BTreeSet::new();
423        // Scan every `.md` file in the store (skip hidden + `log/`), including
424        // `index.md` catalogs — an incoming reference is wherever the literal
425        // link text lives; the caller decides relevance. ONE walk for the whole
426        // target set; per file we stop at the first hit (presence is all we
427        // need), so a file that links to several targets is read once, not once
428        // per target.
429        for rel in self.walk_all_md()? {
430            let abs = self.abs_path(&rel);
431            let mut matched_here = false;
432            let mut searcher = Searcher::new();
433            let res = searcher.search_path(
434                &matcher,
435                &abs,
436                UTF8(|_lnum, _line| {
437                    matched_here = true;
438                    // Stop at the first hit: presence is all we need.
439                    Ok(false)
440                }),
441            );
442            if let Err(e) = res {
443                return Err(StoreError::Search {
444                    root: self.root.clone(),
445                    message: format!("search failed in {}: {e}", abs.display()),
446                });
447            }
448            if matched_here {
449                hits.insert(rel);
450            }
451        }
452        Ok(hits.into_iter().collect())
453    }
454
455    /// Candidate set for a `type` query: read the relevant type-folder
456    /// `index.jsonl` sidecar(s) and return their records. Complete and
457    /// cold-cache-proof — NOT a walk-and-parse or a frontmatter ripgrep scan,
458    /// and **never a store-wide read**. The common path is one sequential read
459    /// of the canonical type-folder sidecar (O(entities)); when that sidecar is
460    /// absent the read is bounded to the type's single layer subtree
461    /// (O(entities-in-layer)), so a `--type proposal` query before that folder
462    /// has been indexed still stays inside the interactive loop's O(entities)
463    /// contract instead of fanning out across every sidecar in the store.
464    pub fn find_by_type(&self, type_: &str) -> Result<Vec<IndexRecord>, StoreError> {
465        // Read the type's canonical-folder sidecar when it exists (the common,
466        // O(entities) path). Otherwise fall back to the sidecars of the *one
467        // layer* the type belongs to and filter by `type` — complete for records
468        // filed under a non-canonical folder name within that layer (e.g. a
469        // custom `proposal` filed in `records/proposals/` when the canonical
470        // guess is the bare `records/proposal/`), without the whole-store
471        // sidecar fan-out that would break the interactive loop's O(entities)
472        // contract. A type lives in exactly one layer, and `default_type_folder`
473        // always encodes it (recognized → its SPEC layer; unrecognized →
474        // `records/`), so the fallback walk is bounded to that layer's subtree —
475        // O(entities-in-layer), never O(store). Either way: sequential, complete
476        // sidecar reads, never a walk-and-parse of the tree.
477        let canonical_folder = default_type_folder(type_);
478        let canonical = self.root.join(&canonical_folder).join(TYPE_INDEX_FILE);
479        let records = if canonical.is_file() {
480            self.read_type_index(&canonical)?
481        } else {
482            self.read_all_type_indexes_in(layer_of_folder(&canonical_folder))?
483        };
484        Ok(records.into_iter().filter(|r| r.type_ == type_).collect())
485    }
486
487    /// Candidate set for a `key=value` frontmatter query, **store-wide**: read
488    /// every type-folder `index.jsonl` sidecar and filter their records. The
489    /// unscoped pre-write dedup primitive; prefer [`Store::find_by_where_in`]
490    /// with a layer scope to stay O(entities-in-layer) on the interactive loop.
491    pub fn find_by_where(&self, key: &str, value: &str) -> Result<Vec<IndexRecord>, StoreError> {
492        self.find_by_where_in(key, value, None)
493    }
494
495    /// Candidate set for a `key=value` frontmatter query, **scoped to one
496    /// layer** when `layer` is `Some`: the sidecar walk is confined to that
497    /// layer's subtree (`<root>/<layer>/`), so the I/O is O(entities-in-layer),
498    /// not O(store records). `None` keeps the store-wide read.
499    ///
500    /// This is what makes `--in <layer>` an I/O scope, not just a result
501    /// filter: a `--where`-only query (no `--type`) used to read every sidecar
502    /// in the store and narrow by layer in memory, breaking the O(entities)
503    /// contract the interactive loop depends on. With a layer in hand we walk
504    /// only that layer's sidecars.
505    pub fn find_by_where_in(
506        &self,
507        key: &str,
508        value: &str,
509        layer: Option<Layer>,
510    ) -> Result<Vec<IndexRecord>, StoreError> {
511        // A `key=value` query can target any frontmatter field across any type,
512        // so within the chosen subtree we still read every type-folder sidecar
513        // and filter. The layer (when given) bounds *which* subtree, turning a
514        // whole-store walk into a single-layer walk.
515        let records = self.read_all_type_indexes_in(layer)?;
516        Ok(records
517            .into_iter()
518            .filter(|r| record_matches_field(r, key, value))
519            .collect())
520    }
521
522    /// Every record across the type-folder `index.jsonl` sidecars, scoped to one
523    /// layer when `layer` is `Some` (the walk is confined to `<root>/<layer>/`)
524    /// else store-wide. Sequential, complete sidecar reads — never a
525    /// walk-and-parse of the content tree.
526    ///
527    /// This is the unfiltered sidecar-enumeration primitive the relationship
528    /// loop sits on: [`crate::graph::backlinks_filtered`] uses it to bound its
529    /// candidate set to the relevant layer (or the whole store) without opening
530    /// the content tree, then confirms each candidate's edge by parsing the file.
531    pub fn sidecar_records(&self, layer: Option<Layer>) -> Result<Vec<IndexRecord>, StoreError> {
532        self.read_all_type_indexes_in(layer)
533    }
534
535    /// Parse a type-folder's `index.jsonl` into [`IndexRecord`]s, applying
536    /// last-write-wins by `path` over any un-compacted lines. The sidecar-read
537    /// primitive every structured query sits on.
538    pub fn read_type_index(&self, index_jsonl: &Path) -> Result<Vec<IndexRecord>, StoreError> {
539        let text = std::fs::read_to_string(index_jsonl).map_err(|e| StoreError::BadTypeIndex {
540            path: index_jsonl.to_path_buf(),
541            message: e.to_string(),
542        })?;
543
544        // Last-write-wins by `path` over un-compacted lines: a later line for
545        // the same path supersedes an earlier one (the jsonl is append-mostly
546        // and only compacted on rebuild). Blank lines are skipped; a non-blank
547        // line that is not a valid IndexRecord is a hard parse error.
548        let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
549        for (i, line) in text.lines().enumerate() {
550            let trimmed = line.trim();
551            if trimmed.is_empty() {
552                continue;
553            }
554            let record: IndexRecord =
555                serde_json::from_str(trimmed).map_err(|e| StoreError::BadTypeIndex {
556                    path: index_jsonl.to_path_buf(),
557                    message: format!("line {}: {e}", i + 1),
558                })?;
559            by_path.insert(record.path.clone(), record);
560        }
561        // BTreeMap keyed by path → records emerge sorted by path ascending,
562        // a deterministic order independent of line order in the file.
563        Ok(by_path.into_values().collect())
564    }
565
566    /// Resolve a store-relative path to its absolute on-disk path under
567    /// [`root`](Store::root).
568    pub fn abs_path(&self, store_relative: &Path) -> PathBuf {
569        // `Path::join` returns `store_relative` unchanged if it is already
570        // absolute, so passing an absolute path through is a no-op.
571        self.root.join(store_relative)
572    }
573
574    /// Convert an absolute path under the store into its store-relative form.
575    pub fn rel_path(&self, abs: &Path) -> Option<PathBuf> {
576        abs.strip_prefix(&self.root).ok().map(|p| p.to_path_buf())
577    }
578
579    // ── Private helpers ─────────────────────────────────────────────────────
580
581    /// Resolve a caller-supplied folder path (store-relative or absolute) to an
582    /// absolute path under the store root.
583    fn resolve_under_root(&self, folder: &Path) -> PathBuf {
584        if folder.is_absolute() {
585            folder.to_path_buf()
586        } else {
587            self.root.join(folder)
588        }
589    }
590
591    /// Walk a subtree for content `.md` files (skip hidden dirs, skip `index.md`
592    /// / `DB.md` / `log.md`), returning store-relative paths. Used by the layer
593    /// and type-folder walks.
594    fn walk_content_md(&self, root: &Path) -> Result<Vec<PathBuf>, StoreError> {
595        let mut out = Vec::new();
596        for entry in self.md_walker(root).build() {
597            let entry = entry.map_err(|e| StoreError::Search {
598                root: root.to_path_buf(),
599                message: e.to_string(),
600            })?;
601            if !is_file_entry(&entry) {
602                continue;
603            }
604            let path = entry.path();
605            if !has_md_extension(path) {
606                continue;
607            }
608            if is_non_content_basename(path) {
609                continue;
610            }
611            if let Some(rel) = self.rel_path(path) {
612                out.push(rel);
613            }
614        }
615        out.sort();
616        Ok(out)
617    }
618
619    /// Walk the whole store for **every** `.md` file (including `index.md`),
620    /// skipping hidden dirs and the `log/` archive tree. Used by the backlink
621    /// scan, where the literal link text can live in any markdown file.
622    fn walk_all_md(&self) -> Result<Vec<PathBuf>, StoreError> {
623        let mut out = Vec::new();
624        for entry in self.md_walker(&self.root).build() {
625            let entry = entry.map_err(|e| StoreError::Search {
626                root: self.root.clone(),
627                message: e.to_string(),
628            })?;
629            if !is_file_entry(&entry) {
630                continue;
631            }
632            let path = entry.path();
633            if !has_md_extension(path) {
634                continue;
635            }
636            if self.is_in_log_dir(path) {
637                continue;
638            }
639            if let Some(rel) = self.rel_path(path) {
640                out.push(rel);
641            }
642        }
643        out.sort();
644        Ok(out)
645    }
646
647    /// Read and merge every type-folder `index.jsonl` sidecar under `layer`
648    /// when given, else the whole store (skip hidden + `log/`). Each sidecar is
649    /// read with last-write-wins by path; across sidecars, paths are disjoint by
650    /// construction (one sidecar per folder), so a plain concatenation preserves
651    /// completeness. A layer scope confines the walk to `<root>/<layer>/`, which
652    /// is what keeps `find_by_where_in` O(entities-in-layer).
653    fn read_all_type_indexes_in(
654        &self,
655        layer: Option<Layer>,
656    ) -> Result<Vec<IndexRecord>, StoreError> {
657        let mut out = Vec::new();
658        for sidecar in self.find_type_index_files_in(layer)? {
659            out.extend(self.read_type_index(&self.abs_path(&sidecar))?);
660        }
661        Ok(out)
662    }
663
664    /// Locate every `index.jsonl` sidecar under `layer` (when given) else the
665    /// whole store (skip hidden + `log/`), returning store-relative paths. The
666    /// walk root is `<root>/<layer>/` for a scoped read and `self.root` for the
667    /// store-wide read; a non-existent layer subtree yields no sidecars rather
668    /// than walking a missing path.
669    fn find_type_index_files_in(&self, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
670        let walk_root = match layer {
671            Some(l) => self.root.join(l.dir_name()),
672            None => self.root.clone(),
673        };
674        // A scoped walk over a layer folder that does not exist yet must be an
675        // empty result, mirroring `walk_layer`'s missing-dir guard — not a walk
676        // error from `ignore` over a nonexistent path.
677        if !walk_root.is_dir() {
678            return Ok(Vec::new());
679        }
680        let mut out = Vec::new();
681        let mut builder = WalkBuilder::new(&walk_root);
682        builder.standard_filters(false).hidden(true);
683        for entry in builder.build() {
684            let entry = entry.map_err(|e| StoreError::Search {
685                root: walk_root.clone(),
686                message: e.to_string(),
687            })?;
688            if !is_file_entry(&entry) {
689                continue;
690            }
691            let path = entry.path();
692            if path.file_name().and_then(|n| n.to_str()) != Some(TYPE_INDEX_FILE) {
693                continue;
694            }
695            if self.is_in_log_dir(path) {
696                continue;
697            }
698            if let Some(rel) = self.rel_path(path) {
699                out.push(rel);
700            }
701        }
702        out.sort();
703        Ok(out)
704    }
705
706    /// A `WalkBuilder` configured for db.md SWEEPs: gitignore/global-ignore are
707    /// OFF (a SWEEP must see every file even if the store is a git repo with a
708    /// `.gitignore`), but hidden files/dirs are skipped.
709    fn md_walker(&self, root: &Path) -> WalkBuilder {
710        let mut builder = WalkBuilder::new(root);
711        builder.standard_filters(false).hidden(true);
712        builder
713    }
714
715    /// True if an absolute path lives under the store's root-level `log/`
716    /// rotation-archive directory.
717    fn is_in_log_dir(&self, abs: &Path) -> bool {
718        match self.rel_path(abs) {
719            Some(rel) => rel.components().next().map(|c| c.as_os_str()) == Some("log".as_ref()),
720            None => false,
721        }
722    }
723
724    /// Read a file's frontmatter `updated` field as an RFC3339 timestamp,
725    /// returning `None` when absent/unparseable. A self-contained reader (does
726    /// not depend on the not-yet-implemented `parser::read_file`); parses the
727    /// leading `---`-fenced YAML block with the same engine the parser uses.
728    fn read_updated(&self, abs: &Path) -> Option<DateTime<FixedOffset>> {
729        let text = std::fs::read_to_string(abs).ok()?;
730        let yaml = frontmatter_block(&text)?;
731        let value: serde_norway::Value = serde_norway::from_str(yaml).ok()?;
732        let raw = value.get("updated")?;
733        value_to_datetime(raw)
734    }
735
736    /// The `<YYYY>/<MM>` shard segment for a sharding type, from its primary
737    /// date field with a `created` fallback. Reads the public `Frontmatter`
738    /// fields directly. `None` when no usable date is present.
739    fn primary_shard_segment(&self, type_: &str, fm: &Frontmatter) -> Option<(String, String)> {
740        // Try the type's primary date field first.
741        if let Some(field) = primary_date_field(type_) {
742            if let Some(v) = fm.extra.get(field) {
743                if let Some(seg) = value_to_year_month(v) {
744                    return Some(seg);
745                }
746            }
747        }
748        // Universal fallback: the typed `created` timestamp.
749        fm.created
750            .map(|dt| (format!("{:04}", dt.year()), format!("{:02}", dt.month())))
751    }
752}
753
754// ── Free helpers (no `self`) ────────────────────────────────────────────────
755
756/// True if a walk entry is a regular file (not a dir / symlink-to-dir).
757fn is_file_entry(entry: &ignore::DirEntry) -> bool {
758    entry.file_type().map(|ft| ft.is_file()).unwrap_or(false)
759}
760
761/// True if the path ends in a `.md` extension (case-sensitive — db.md files are
762/// lowercase `.md`).
763fn has_md_extension(path: &Path) -> bool {
764    path.extension().and_then(|e| e.to_str()) == Some("md")
765}
766
767/// True if the basename is a non-content meta file (`DB.md`, `index.md`,
768/// `log.md`) that the content walks must skip.
769fn is_non_content_basename(path: &Path) -> bool {
770    match path.file_name().and_then(|n| n.to_str()) {
771        Some(name) => NON_CONTENT_BASENAMES.contains(&name),
772        None => false,
773    }
774}
775
776/// Append `.md` to a bare name; leave an existing `.md` untouched.
777fn ensure_md_extension(name: &str) -> String {
778    if name.ends_with(".md") {
779        name.to_string()
780    } else {
781        format!("{name}.md")
782    }
783}
784
785/// Render a store-relative path as a wiki-link target string with `/`
786/// separators (never `\`), no leading `./`, no trailing `.md`.
787fn path_to_link_str(target: &Path) -> String {
788    let mut parts: Vec<String> = Vec::new();
789    for comp in target.components() {
790        if let std::path::Component::Normal(os) = comp {
791            if let Some(s) = os.to_str() {
792                parts.push(s.to_string());
793            }
794        }
795    }
796    let mut joined = parts.join("/");
797    if let Some(stripped) = joined.strip_suffix(".md") {
798        joined = stripped.to_string();
799    }
800    joined
801}
802
803/// The canonical default folder for a recognized type, per the SPEC type table
804/// (`email → sources/emails`, `expense → records/expenses`, …). Unrecognized
805/// types fall back to `records/<type>` (the bare type name, no pluralization
806/// guess) — see the store findings on the docstring's looser `<type>` phrasing.
807fn default_type_folder(type_: &str) -> PathBuf {
808    let path = match type_ {
809        // sources
810        "email" => "sources/emails",
811        "transcript" => "sources/transcripts",
812        "pdf-source" => "sources/docs",
813        // records — entities
814        "contact" => "records/contacts",
815        "company" => "records/companies",
816        // records — events
817        "expense" => "records/expenses",
818        "meeting" => "records/meetings",
819        "decision" => "records/decisions",
820        "invoice" => "records/invoices",
821        // wiki — the SPEC type table files a wiki-page under `wiki/<topic>/`,
822        // i.e. ALWAYS a sub-folder, never flat under `wiki/`. A 2-component
823        // `wiki/<file>` path is non-conforming: `index::type_folder_of` /
824        // `validate::type_folder_of` require `<layer>/<type-folder>/<file>` (3
825        // components), so a flat wiki page either crashes write-through
826        // (`on_write` tries to create `index.md` *inside* a file) or is silently
827        // dropped from every catalog by `rebuild_all`. `topic` is the page's
828        // canonical bucket; with only the bare type in hand here, `wiki/topics`
829        // is the deterministic default folder (matches the dogfood store).
830        "wiki-page" => "wiki/topics",
831        // unrecognized: bare type name under records/
832        other => return PathBuf::from("records").join(other),
833    };
834    PathBuf::from(path)
835}
836
837/// The canonical [`Layer`] a `type_` belongs to, derived from its default
838/// type-folder (`email` → `Sources`, `contact` → `Records`, `wiki-page` →
839/// `Wiki`, unrecognized → `Records`). The write path uses this to decide whether
840/// an agent-supplied folder is in the *right* layer for the type before honouring
841/// its sub-folder choice.
842pub fn layer_for_type(type_: &str) -> Layer {
843    layer_of_folder(&default_type_folder(type_)).unwrap_or(Layer::Records)
844}
845
846/// The [`Layer`] a type-folder path lives in, read from its first component
847/// (`sources/` → `Sources`, `records/` → `Records`, `wiki/` → `Wiki`). Used to
848/// bound [`Store::find_by_type`]'s canonical-folder-absent fallback to a single
849/// layer subtree. Returns `None` for a path with no recognized layer prefix;
850/// every value [`default_type_folder`] produces has one, so in practice this is
851/// always `Some` on the call path — `None` degrades to a store-wide read.
852fn layer_of_folder(folder: &Path) -> Option<Layer> {
853    let first = folder.components().next()?.as_os_str().to_str()?;
854    Layer::from_dir_name(first)
855}
856
857/// Infer a content file's canonical `type` from its store-relative path — the
858/// inverse of [`default_type_folder`] and the single source of truth for
859/// path→type inference (the CLI's `fm init` calls this, never re-derives it).
860///
861/// Requires the canonical `<layer>/<type-folder>/<file>` 3-component shape; a
862/// shorter path (a file directly under a layer) or an unknown leading layer
863/// yields `None`.
864///
865/// Recognized `(layer, folder)` pairs map back to their canonical type. For an
866/// unrecognized folder the fallback is the **bare folder name verbatim** (no
867/// pluralization/singularization) so it round-trips with `default_type_folder`,
868/// whose unrecognized fallback is the bare type name (`task` ⇄ `records/task`).
869/// Singularizing here would break that round-trip (`records/tasks` → `task`
870/// while `default_type_folder("task")` → `records/task`). `wiki/<topic>` always
871/// infers `wiki-page`, since every wiki page is filed under a topic folder.
872pub fn infer_type_from_path(rel: &Path) -> Option<String> {
873    let mut comps = rel.components().filter_map(|c| c.as_os_str().to_str());
874    let layer = comps.next()?;
875    if !matches!(layer, "sources" | "records" | "wiki") {
876        return None;
877    }
878    let folder = comps.next()?;
879    // The file itself must be a third component (a real type-folder, not the
880    // file sitting directly under the layer).
881    comps.next()?;
882
883    let mapped = match (layer, folder) {
884        ("sources", "emails") => "email",
885        ("sources", "transcripts") => "transcript",
886        ("sources", "docs") => "pdf-source",
887        ("records", "contacts") => "contact",
888        ("records", "companies") => "company",
889        ("records", "expenses") => "expense",
890        ("records", "meetings") => "meeting",
891        ("records", "decisions") => "decision",
892        ("records", "invoices") => "invoice",
893        // Every wiki page is filed under `wiki/<topic>/`; the type is always
894        // `wiki-page` regardless of the topic-folder name.
895        ("wiki", _) => "wiki-page",
896        // Unrecognized folder: the bare name, verbatim. This is the inverse of
897        // `default_type_folder`'s unrecognized fallback (`other → records/other`)
898        // and the round-trip would break if we pluralized/singularized here.
899        (_, other) => other,
900    };
901    Some(mapped.to_string())
902}
903
904/// The primary date field name for a sharding type (the field whose value
905/// drives `<YYYY>/<MM>`). `None` means "use the `created` fallback only".
906fn primary_date_field(type_: &str) -> Option<&'static str> {
907    match type_ {
908        "email" => Some("date"),
909        "transcript" => Some("recorded_at"),
910        "pdf-source" => Some("received_at"),
911        "expense" | "invoice" | "meeting" => Some("date"),
912        // recognized custom event types have no canonical date field name; they
913        // fall back to `created`.
914        _ => None,
915    }
916}
917
918/// Parse a YAML value into an RFC3339 [`DateTime`], accepting both an explicit
919/// string and a YAML-native scalar rendered to string.
920fn value_to_datetime(value: &serde_norway::Value) -> Option<DateTime<FixedOffset>> {
921    let s = yaml_scalar_string(value)?;
922    DateTime::parse_from_rfc3339(s.trim()).ok()
923}
924
925/// Extract `(YYYY, MM)` from a YAML date/timestamp value. Lenient: matches a
926/// leading `YYYY-MM` so a bare `2026-05-22` date and a full
927/// `2026-05-22T10:00:00-07:00` timestamp both work.
928fn value_to_year_month(value: &serde_norway::Value) -> Option<(String, String)> {
929    let s = yaml_scalar_string(value)?;
930    year_month_from_str(s.trim())
931}
932
933/// `(YYYY, MM)` from the leading `YYYY-MM` of a date string.
934fn year_month_from_str(s: &str) -> Option<(String, String)> {
935    // Hand-roll the leading-`YYYY-MM` parse to avoid a regex compile on the
936    // write path. Require: 4 digits, '-', 2 digits.
937    let bytes = s.as_bytes();
938    if bytes.len() < 7 {
939        return None;
940    }
941    let is_digit = |b: u8| b.is_ascii_digit();
942    if !(is_digit(bytes[0])
943        && is_digit(bytes[1])
944        && is_digit(bytes[2])
945        && is_digit(bytes[3])
946        && bytes[4] == b'-'
947        && is_digit(bytes[5])
948        && is_digit(bytes[6]))
949    {
950        return None;
951    }
952    let month: u8 = (bytes[5] - b'0') * 10 + (bytes[6] - b'0');
953    if !(1..=12).contains(&month) {
954        return None;
955    }
956    Some((s[0..4].to_string(), s[5..7].to_string()))
957}
958
959/// Render a YAML scalar as a string: a real `String` verbatim, otherwise the
960/// value's compact YAML serialization (covers timestamps that the YAML engine
961/// may surface as a non-string scalar).
962fn yaml_scalar_string(value: &serde_norway::Value) -> Option<String> {
963    if let Some(s) = value.as_str() {
964        return Some(s.to_string());
965    }
966    match value {
967        serde_norway::Value::Null => None,
968        serde_norway::Value::Mapping(_) | serde_norway::Value::Sequence(_) => None,
969        other => serde_norway::to_string(other)
970            .ok()
971            .map(|s| s.trim().to_string()),
972    }
973}
974
975/// The YAML frontmatter block of a file: the text between a leading `---` fence
976/// and the next `---` fence, exclusive. `None` if the file does not open with a
977/// `---` fence on its first line.
978fn frontmatter_block(text: &str) -> Option<&str> {
979    // Tolerate a UTF-8 BOM and CRLF, but the fence must be the very first line.
980    let body = text.strip_prefix('\u{feff}').unwrap_or(text);
981    let mut rest = body;
982    // First line must be exactly `---` (allowing trailing CR).
983    let (first, after_first) = split_first_line(rest);
984    if first.trim_end_matches('\r') != "---" {
985        return None;
986    }
987    rest = after_first;
988    let block_start = rest;
989    let mut scanned = 0usize;
990    loop {
991        let (line, after) = split_first_line(rest);
992        if line.trim_end_matches('\r') == "---" {
993            return Some(&block_start[..scanned]);
994        }
995        if after.is_empty() && line.is_empty() {
996            // Reached end of input without a closing fence.
997            return None;
998        }
999        scanned += line.len() + 1; // +1 for the consumed '\n'
1000        if after.is_empty() {
1001            return None;
1002        }
1003        rest = after;
1004    }
1005}
1006
1007/// Split a string into (first line without its trailing `\n`, remainder after
1008/// the `\n`). If there is no newline, the whole string is the line and the
1009/// remainder is empty.
1010fn split_first_line(s: &str) -> (&str, &str) {
1011    match s.find('\n') {
1012        Some(i) => (&s[..i], &s[i + 1..]),
1013        None => (s, ""),
1014    }
1015}
1016
1017/// True if an [`IndexRecord`] has a field `key` equal to `value`, checking the
1018/// typed columns first and then the flattened `fields` map.
1019fn record_matches_field(record: &IndexRecord, key: &str, value: &str) -> bool {
1020    match key {
1021        "type" => record.type_ == value,
1022        "summary" => record.summary == value,
1023        "path" => record.path.to_string_lossy() == value,
1024        "created" => timestamp_matches(record.created, value),
1025        "updated" => timestamp_matches(record.updated, value),
1026        "tags" => record.tags.iter().any(|t| t == value),
1027        "links" => record.links.iter().any(|l| l == value),
1028        other => record
1029            .fields
1030            .get(other)
1031            .map(|v| json_value_matches(v, value))
1032            .unwrap_or(false),
1033    }
1034}
1035
1036/// Compare a record's `created`/`updated` instant against a query `value`.
1037///
1038/// db.md files write timestamps in several equivalent RFC3339 spellings — most
1039/// commonly the `Z` UTC designator (`2026-05-01T00:00:00Z`) but also an explicit
1040/// offset (`...+00:00`, `...-07:00`). A naive `record.created.to_rfc3339() ==
1041/// value` reformats only one side: chrono renders a UTC instant as `+00:00`, so
1042/// the `Z` form an agent reads straight out of the file would never match. We
1043/// instead parse `value` as RFC3339 and compare instants, where `Z` and `+00:00`
1044/// (and any same-instant offset) are equal. A `value` that is not valid RFC3339
1045/// can never equal a real timestamp, so it falls through to `false`.
1046fn timestamp_matches(stored: Option<DateTime<FixedOffset>>, value: &str) -> bool {
1047    match (stored, DateTime::parse_from_rfc3339(value)) {
1048        (Some(stored), Ok(queried)) => stored == queried,
1049        _ => false,
1050    }
1051}
1052
1053/// Compare a JSON field value against a query string. A string matches
1054/// verbatim; scalars match their textual form; an array matches if any element
1055/// matches (so a list-valued frontmatter field is membership-queried).
1056fn json_value_matches(v: &serde_json::Value, value: &str) -> bool {
1057    match v {
1058        serde_json::Value::String(s) => s == value,
1059        serde_json::Value::Bool(b) => b.to_string() == value,
1060        serde_json::Value::Number(n) => n.to_string() == value,
1061        serde_json::Value::Array(items) => items.iter().any(|i| json_value_matches(i, value)),
1062        // A present-but-null field never matches — consistent with the in-memory
1063        // post-filter (`query::json_value_matches`, which the first `where`
1064        // clause is NOT re-checked against, so the two must agree here or a
1065        // `--where field=` query would return different rows than `--type X
1066        // --where field=`).
1067        serde_json::Value::Null => false,
1068        serde_json::Value::Object(_) => false,
1069    }
1070}
1071
1072#[cfg(test)]
1073mod tests {
1074    use super::*;
1075    use std::fs;
1076    use tempfile::{tempdir, TempDir};
1077
1078    // ── Fixtures ────────────────────────────────────────────────────────────
1079
1080    /// Write `contents` to `<root>/<rel>`, creating parent dirs. Returns the
1081    /// store-relative path for convenient assertions.
1082    fn write(root: &Path, rel: &str, contents: &str) -> PathBuf {
1083        let abs = root.join(rel);
1084        fs::create_dir_all(abs.parent().unwrap()).unwrap();
1085        fs::write(&abs, contents).unwrap();
1086        PathBuf::from(rel)
1087    }
1088
1089    /// A minimal content file with the given `updated` timestamp in frontmatter.
1090    fn content_md(updated: &str) -> String {
1091        format!(
1092            "---\ntype: note\ncreated: {updated}\nupdated: {updated}\nsummary: a note\n---\n\nbody\n"
1093        )
1094    }
1095
1096    /// A bare directory with a `DB.md` marker (valid `db-md` frontmatter so the
1097    /// real parser is exercised).
1098    fn empty_store() -> TempDir {
1099        let dir = tempdir().unwrap();
1100        fs::write(
1101            dir.path().join("DB.md"),
1102            "---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n",
1103        )
1104        .unwrap();
1105        dir
1106    }
1107
1108    /// Open a store rooted at a TempDir; panics if `open` rejects it.
1109    fn open(dir: &TempDir) -> Store {
1110        Store::open(dir.path()).expect("fixture should be a valid store")
1111    }
1112
1113    fn rels(paths: &[PathBuf]) -> Vec<String> {
1114        paths
1115            .iter()
1116            .map(|p| p.to_string_lossy().replace('\\', "/"))
1117            .collect()
1118    }
1119
1120    // ── Layer ───────────────────────────────────────────────────────────────
1121
1122    #[test]
1123    fn layer_dir_name_and_parse_are_inverse() {
1124        for layer in Layer::all() {
1125            assert_eq!(Layer::from_dir_name(layer.dir_name()), Some(layer));
1126        }
1127        assert_eq!(Layer::Sources.dir_name(), "sources");
1128        assert_eq!(Layer::Records.dir_name(), "records");
1129        assert_eq!(Layer::Wiki.dir_name(), "wiki");
1130        assert_eq!(Layer::from_dir_name("log"), None);
1131        assert_eq!(Layer::from_dir_name("Sources"), None); // case-sensitive
1132    }
1133
1134    #[test]
1135    fn layer_order_is_canonical() {
1136        // stats keys a BTreeMap on Layer; the sort order must be sources<records<wiki.
1137        let mut v = [Layer::Wiki, Layer::Sources, Layer::Records];
1138        v.sort();
1139        assert_eq!(v, [Layer::Sources, Layer::Records, Layer::Wiki]);
1140    }
1141
1142    // ── is_db_md_store / open ────────────────────────────────────────────────
1143
1144    #[test]
1145    fn is_store_true_only_with_uppercase_marker() {
1146        let dir = tempdir().unwrap();
1147        assert!(
1148            !Store::is_db_md_store(dir.path()),
1149            "no marker → not a store"
1150        );
1151
1152        fs::write(dir.path().join("DB.md"), "---\ntype: db-md\n---\n").unwrap();
1153        assert!(Store::is_db_md_store(dir.path()), "uppercase DB.md → store");
1154    }
1155
1156    #[test]
1157    fn is_store_false_for_lowercase_db_md() {
1158        // The case-sensitivity contract: a lowercase db.md is the spec name, not
1159        // a marker — even on a case-insensitive filesystem where Path::exists
1160        // would lie. This test must pass on macOS (case-insensitive) too.
1161        let dir = tempdir().unwrap();
1162        fs::write(dir.path().join("db.md"), "---\ntype: db-md\n---\n").unwrap();
1163        assert!(
1164            !Store::is_db_md_store(dir.path()),
1165            "lowercase db.md must NOT be treated as a store marker"
1166        );
1167        assert!(Store::open(dir.path()).is_err());
1168    }
1169
1170    #[test]
1171    fn is_store_false_when_db_md_is_a_directory() {
1172        let dir = tempdir().unwrap();
1173        fs::create_dir(dir.path().join("DB.md")).unwrap();
1174        assert!(
1175            !Store::is_db_md_store(dir.path()),
1176            "a directory named DB.md is not the file marker"
1177        );
1178    }
1179
1180    #[test]
1181    fn open_rejects_non_store_with_path() {
1182        let dir = tempdir().unwrap();
1183        let err = Store::open(dir.path()).unwrap_err();
1184        assert_eq!(err.path, dir.path());
1185    }
1186
1187    #[test]
1188    fn open_succeeds_and_parses_config() {
1189        let dir = tempdir().unwrap();
1190        // A DB.md whose ## Policies declares a frozen page — proves open()
1191        // actually parsed the config rather than substituting a default.
1192        fs::write(
1193            dir.path().join("DB.md"),
1194            "---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n\n\
1195             ## Policies\n\n### Frozen pages\n- records/decisions/q1.md\n",
1196        )
1197        .unwrap();
1198        let store = Store::open(dir.path()).unwrap();
1199        assert_eq!(store.root, dir.path());
1200        assert!(
1201            store
1202                .config
1203                .frozen_pages
1204                .iter()
1205                .any(|p| p == Path::new("records/decisions/q1.md")),
1206            "open() must surface DB.md ## Policies, got {:?}",
1207            store.config.frozen_pages
1208        );
1209    }
1210
1211    // ── walk / walk_layer / walk_type_folder ─────────────────────────────────
1212
1213    #[test]
1214    fn walk_collects_content_across_layers_skipping_meta_and_log() {
1215        let dir = empty_store();
1216        let root = dir.path();
1217        write(
1218            root,
1219            "sources/emails/2026/05/a.md",
1220            &content_md("2026-05-01T00:00:00Z"),
1221        );
1222        write(
1223            root,
1224            "records/contacts/sarah.md",
1225            &content_md("2026-05-02T00:00:00Z"),
1226        );
1227        write(
1228            root,
1229            "wiki/people/sarah.md",
1230            &content_md("2026-05-03T00:00:00Z"),
1231        );
1232        // Things walk() must SKIP:
1233        write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); // catalog
1234        write(root, "index.md", "---\ntype: index\n---\n"); // root catalog
1235        write(root, "log.md", "---\ntype: log\n---\n"); // log
1236        write(root, "log/2026-04.md", "---\ntype: log\n---\n"); // rotated log archive
1237        write(
1238            root,
1239            "sources/.hidden/secret.md",
1240            &content_md("2026-05-09T00:00:00Z"),
1241        ); // hidden dir
1242        write(root, "records/contacts/notes.txt", "not markdown"); // non-md
1243
1244        let store = open(&dir);
1245        let got = rels(&store.walk().unwrap());
1246        assert_eq!(
1247            got,
1248            vec![
1249                "records/contacts/sarah.md".to_string(),
1250                "sources/emails/2026/05/a.md".to_string(),
1251                "wiki/people/sarah.md".to_string(),
1252            ]
1253        );
1254    }
1255
1256    #[test]
1257    fn walk_layer_is_scoped() {
1258        let dir = empty_store();
1259        let root = dir.path();
1260        write(
1261            root,
1262            "sources/emails/2026/05/a.md",
1263            &content_md("2026-05-01T00:00:00Z"),
1264        );
1265        write(
1266            root,
1267            "records/contacts/sarah.md",
1268            &content_md("2026-05-02T00:00:00Z"),
1269        );
1270        let store = open(&dir);
1271
1272        assert_eq!(
1273            rels(&store.walk_layer(Layer::Sources).unwrap()),
1274            vec!["sources/emails/2026/05/a.md".to_string()]
1275        );
1276        assert_eq!(
1277            rels(&store.walk_layer(Layer::Records).unwrap()),
1278            vec!["records/contacts/sarah.md".to_string()]
1279        );
1280        // A layer with no directory is empty, not an error.
1281        assert!(store.walk_layer(Layer::Wiki).unwrap().is_empty());
1282    }
1283
1284    #[test]
1285    fn walk_type_folder_recurses_shards_and_accepts_abs_or_rel() {
1286        let dir = empty_store();
1287        let root = dir.path();
1288        write(
1289            root,
1290            "sources/emails/2026/05/a.md",
1291            &content_md("2026-05-01T00:00:00Z"),
1292        );
1293        write(
1294            root,
1295            "sources/emails/2026/06/b.md",
1296            &content_md("2026-06-01T00:00:00Z"),
1297        );
1298        write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); // skipped
1299                                                                           // A different type folder must not leak in.
1300        write(
1301            root,
1302            "sources/docs/2026/05/c.md",
1303            &content_md("2026-05-04T00:00:00Z"),
1304        );
1305        let store = open(&dir);
1306
1307        let expected = vec![
1308            "sources/emails/2026/05/a.md".to_string(),
1309            "sources/emails/2026/06/b.md".to_string(),
1310        ];
1311        // Relative folder arg.
1312        assert_eq!(
1313            rels(&store.walk_type_folder(Path::new("sources/emails")).unwrap()),
1314            expected
1315        );
1316        // Absolute folder arg under the store resolves identically.
1317        assert_eq!(
1318            rels(
1319                &store
1320                    .walk_type_folder(&root.join("sources/emails"))
1321                    .unwrap()
1322            ),
1323            expected
1324        );
1325    }
1326
1327    // ── recent_in_type_folder ────────────────────────────────────────────────
1328
1329    #[test]
1330    fn recent_orders_by_updated_desc_then_path_and_caps() {
1331        let dir = empty_store();
1332        let root = dir.path();
1333        // newest
1334        write(
1335            root,
1336            "records/meetings/2026/05/c.md",
1337            &content_md("2026-05-03T00:00:00Z"),
1338        );
1339        // tie on updated — path asc decides (a before b)
1340        write(
1341            root,
1342            "records/meetings/2026/05/a.md",
1343            &content_md("2026-05-02T00:00:00Z"),
1344        );
1345        write(
1346            root,
1347            "records/meetings/2026/05/b.md",
1348            &content_md("2026-05-02T00:00:00Z"),
1349        );
1350        // oldest
1351        write(
1352            root,
1353            "records/meetings/2026/04/z.md",
1354            &content_md("2026-04-01T00:00:00Z"),
1355        );
1356        let store = open(&dir);
1357
1358        let all = rels(
1359            &store
1360                .recent_in_type_folder(Path::new("records/meetings"), 10)
1361                .unwrap(),
1362        );
1363        assert_eq!(
1364            all,
1365            vec![
1366                "records/meetings/2026/05/c.md".to_string(), // newest
1367                "records/meetings/2026/05/a.md".to_string(), // tie, path asc
1368                "records/meetings/2026/05/b.md".to_string(),
1369                "records/meetings/2026/04/z.md".to_string(), // oldest
1370            ]
1371        );
1372
1373        // Cap takes the n most-recent.
1374        let top2 = rels(
1375            &store
1376                .recent_in_type_folder(Path::new("records/meetings"), 2)
1377                .unwrap(),
1378        );
1379        assert_eq!(
1380            top2,
1381            vec![
1382                "records/meetings/2026/05/c.md".to_string(),
1383                "records/meetings/2026/05/a.md".to_string(),
1384            ]
1385        );
1386    }
1387
1388    #[test]
1389    fn recent_sorts_undated_files_last() {
1390        let dir = empty_store();
1391        let root = dir.path();
1392        write(
1393            root,
1394            "records/contacts/dated.md",
1395            &content_md("2026-05-01T00:00:00Z"),
1396        );
1397        // No `updated` field at all.
1398        write(
1399            root,
1400            "records/contacts/undated.md",
1401            "---\ntype: contact\nsummary: x\n---\nbody\n",
1402        );
1403        let store = open(&dir);
1404        let got = rels(
1405            &store
1406                .recent_in_type_folder(Path::new("records/contacts"), 10)
1407                .unwrap(),
1408        );
1409        assert_eq!(
1410            got,
1411            vec![
1412                "records/contacts/dated.md".to_string(),
1413                "records/contacts/undated.md".to_string(),
1414            ],
1415            "a file with a real `updated` must outrank one with none"
1416        );
1417    }
1418
1419    // ── type_shards ──────────────────────────────────────────────────────────
1420
1421    #[test]
1422    fn type_shards_classification() {
1423        let dir = empty_store();
1424        let store = open(&dir);
1425        for t in [
1426            "email",
1427            "transcript",
1428            "pdf-source",
1429            "expense",
1430            "invoice",
1431            "meeting",
1432            "order",
1433            "ticket",
1434            "transaction",
1435        ] {
1436            assert!(store.type_shards(t), "{t} should shard");
1437        }
1438        for t in [
1439            "contact",
1440            "company",
1441            "decision",
1442            "wiki-page",
1443            "index",
1444            "log",
1445            "db-md",
1446            "proposal",
1447        ] {
1448            assert!(!store.type_shards(t), "{t} should stay flat");
1449        }
1450    }
1451
1452    // ── shard_path_for ───────────────────────────────────────────────────────
1453
1454    fn fm_with_extra(key: &str, value: &str) -> Frontmatter {
1455        let mut fm = Frontmatter::default();
1456        fm.extra.insert(
1457            key.to_string(),
1458            serde_norway::Value::String(value.to_string()),
1459        );
1460        fm
1461    }
1462
1463    fn fm_with_created(rfc3339: &str) -> Frontmatter {
1464        Frontmatter {
1465            created: Some(DateTime::parse_from_rfc3339(rfc3339).unwrap()),
1466            ..Default::default()
1467        }
1468    }
1469
1470    #[test]
1471    fn shard_path_uses_primary_date_field_per_type() {
1472        let dir = empty_store();
1473        let store = open(&dir);
1474
1475        // expense.date → records/expenses/<YYYY>/<MM>/
1476        let p = store
1477            .shard_path_for("expense", &fm_with_extra("date", "2026-05-22"), "lunch")
1478            .unwrap();
1479        assert_eq!(p, PathBuf::from("records/expenses/2026/05/lunch.md"));
1480
1481        // email.date → sources/emails/<YYYY>/<MM>/
1482        let p = store
1483            .shard_path_for(
1484                "email",
1485                &fm_with_extra("date", "2026-11-02T09:00:00-07:00"),
1486                "e1",
1487            )
1488            .unwrap();
1489        assert_eq!(p, PathBuf::from("sources/emails/2026/11/e1.md"));
1490
1491        // transcript.recorded_at → sources/transcripts/<YYYY>/<MM>/
1492        let p = store
1493            .shard_path_for(
1494                "transcript",
1495                &fm_with_extra("recorded_at", "2025-01-15T12:00:00Z"),
1496                "t1",
1497            )
1498            .unwrap();
1499        assert_eq!(p, PathBuf::from("sources/transcripts/2025/01/t1.md"));
1500    }
1501
1502    #[test]
1503    fn shard_path_falls_back_to_created() {
1504        let dir = empty_store();
1505        let store = open(&dir);
1506        // meeting with no `date` field but a `created` timestamp.
1507        let p = store
1508            .shard_path_for(
1509                "meeting",
1510                &fm_with_created("2024-07-09T08:30:00-04:00"),
1511                "sync",
1512            )
1513            .unwrap();
1514        assert_eq!(p, PathBuf::from("records/meetings/2024/07/sync.md"));
1515    }
1516
1517    #[test]
1518    fn shard_path_primary_field_wins_over_created() {
1519        let dir = empty_store();
1520        let store = open(&dir);
1521        let mut fm = fm_with_created("2020-01-01T00:00:00Z");
1522        fm.extra.insert(
1523            "date".into(),
1524            serde_norway::Value::String("2026-05-22".into()),
1525        );
1526        let p = store.shard_path_for("expense", &fm, "x").unwrap();
1527        // The primary `date` (2026/05), not `created` (2020/01), drives the shard.
1528        assert_eq!(p, PathBuf::from("records/expenses/2026/05/x.md"));
1529    }
1530
1531    #[test]
1532    fn shard_path_flat_types_have_no_shard_segment() {
1533        let dir = empty_store();
1534        let store = open(&dir);
1535        // A contact has a `created` date, but contacts stay flat.
1536        let p = store
1537            .shard_path_for(
1538                "contact",
1539                &fm_with_created("2026-05-22T00:00:00Z"),
1540                "sarah-chen",
1541            )
1542            .unwrap();
1543        assert_eq!(p, PathBuf::from("records/contacts/sarah-chen.md"));
1544
1545        // wiki-page is flat (no date shard) but still files under a type-folder:
1546        // `wiki/topics/<name>.md`, NEVER flat as `wiki/<name>.md`. A 2-component
1547        // path is invisible to the index/validate type-folder model.
1548        let p = store
1549            .shard_path_for("wiki-page", &Frontmatter::default(), "renewal-theme")
1550            .unwrap();
1551        assert_eq!(p, PathBuf::from("wiki/topics/renewal-theme.md"));
1552    }
1553
1554    /// Regression: a wiki-page written through the toolkit's own path
1555    /// computation must land at a path the index + validate type-folder model
1556    /// accepts. `shard_path_for("wiki-page", …)` previously returned a
1557    /// 2-component `wiki/<file>` path, which `type_folder_of` (in both `index`
1558    /// and `validate`) treats as "no type-folder" — so the page either crashed
1559    /// `Index::on_write` (it tried to create `index.md` inside a file) or was
1560    /// silently dropped from every catalog by `Index::rebuild_all`. The
1561    /// computed path must have 3 components: `<layer>/<type-folder>/<file>`.
1562    #[test]
1563    fn shard_path_wiki_page_is_indexable_three_component_path() {
1564        let dir = empty_store();
1565        let store = open(&dir);
1566        let p = store
1567            .shard_path_for("wiki-page", &Frontmatter::default(), "renewal-theme")
1568            .unwrap();
1569        // First two components are a layer + a non-empty type-folder segment;
1570        // the file is the third. This is exactly the shape `type_folder_of`
1571        // (`comps.len() >= 3`, `comps[0]` a known layer) requires.
1572        let comps: Vec<&str> = p.iter().filter_map(|c| c.to_str()).collect();
1573        assert_eq!(
1574            comps.len(),
1575            3,
1576            "wiki-page path must be <layer>/<type-folder>/<file>, got {p:?}"
1577        );
1578        assert_eq!(comps[0], "wiki", "first component must be the wiki layer");
1579        assert!(
1580            !comps[1].is_empty() && comps[1] != "renewal-theme.md",
1581            "second component must be a real type-folder, not the file: {p:?}"
1582        );
1583        assert!(
1584            comps[2].ends_with(".md"),
1585            "third component must be the .md file: {p:?}"
1586        );
1587    }
1588
1589    #[test]
1590    fn shard_path_preserves_and_adds_md_extension() {
1591        let dir = empty_store();
1592        let store = open(&dir);
1593        let with = store
1594            .shard_path_for("contact", &Frontmatter::default(), "sarah.md")
1595            .unwrap();
1596        let without = store
1597            .shard_path_for("contact", &Frontmatter::default(), "sarah")
1598            .unwrap();
1599        assert_eq!(with, PathBuf::from("records/contacts/sarah.md"));
1600        assert_eq!(without, PathBuf::from("records/contacts/sarah.md"));
1601    }
1602
1603    #[test]
1604    fn shard_path_errors_when_sharding_type_has_no_date() {
1605        let dir = empty_store();
1606        let store = open(&dir);
1607        // expense shards, but no `date` and no `created` → NoShardDate.
1608        let err = store
1609            .shard_path_for("expense", &Frontmatter::default(), "mystery")
1610            .unwrap_err();
1611        match err {
1612            StoreError::NoShardDate { file } => {
1613                assert_eq!(file, PathBuf::from("records/expenses/mystery.md"));
1614            }
1615            other => panic!("expected NoShardDate, got {other:?}"),
1616        }
1617    }
1618
1619    // ── find_links_to ────────────────────────────────────────────────────────
1620
1621    #[test]
1622    fn find_links_to_matches_all_accepted_spellings() {
1623        let dir = empty_store();
1624        let root = dir.path();
1625        let target = "records/contacts/sarah-chen";
1626
1627        // Plain link.
1628        write(
1629            root,
1630            "wiki/people/sarah.md",
1631            &format!("---\ntype: wiki-page\nsummary: s\n---\nSee [[{target}]].\n"),
1632        );
1633        // Link with display text.
1634        write(
1635            root,
1636            "records/meetings/2026/05/m.md",
1637            &format!("---\ntype: meeting\nsummary: s\n---\nWith [[{target}|Sarah]].\n"),
1638        );
1639        // Link with .md extension (accepted, warned by validate).
1640        write(
1641            root,
1642            "wiki/themes/t.md",
1643            &format!("---\ntype: wiki-page\nsummary: s\n---\n[[{target}.md]]\n"),
1644        );
1645        // A catalog/index file also contains the link literally — included.
1646        write(
1647            root,
1648            "records/contacts/index.md",
1649            &format!("---\ntype: index\n---\n- [[{target}]] — Sarah\n"),
1650        );
1651        // No link to the target.
1652        write(
1653            root,
1654            "wiki/people/elena.md",
1655            "---\ntype: wiki-page\nsummary: s\n---\nNo links here.\n",
1656        );
1657        // Short-form link must NOT match the full-path target.
1658        write(
1659            root,
1660            "wiki/people/bob.md",
1661            "---\ntype: wiki-page\nsummary: s\n---\n[[sarah-chen]]\n",
1662        );
1663        // A longer path that merely starts with the target must NOT match
1664        // (boundary correctness): target `sarah-chen` vs `sarah-chen-jr`.
1665        write(
1666            root,
1667            "wiki/people/jr.md",
1668            &format!("---\ntype: wiki-page\nsummary: s\n---\n[[{target}-jr]]\n"),
1669        );
1670
1671        let store = open(&dir);
1672        let got = rels(&store.find_links_to(Path::new(target)).unwrap());
1673        assert_eq!(
1674            got,
1675            vec![
1676                "records/contacts/index.md".to_string(),
1677                "records/meetings/2026/05/m.md".to_string(),
1678                "wiki/people/sarah.md".to_string(),
1679                "wiki/themes/t.md".to_string(),
1680            ]
1681        );
1682    }
1683
1684    #[test]
1685    fn find_links_to_distinguishes_sibling_paths() {
1686        // Two contacts whose paths share a prefix; a link to one must not be
1687        // reported as a link to the other.
1688        let dir = empty_store();
1689        let root = dir.path();
1690        write(
1691            root,
1692            "wiki/a.md",
1693            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah]]\n",
1694        );
1695        write(
1696            root,
1697            "wiki/b.md",
1698            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
1699        );
1700        let store = open(&dir);
1701
1702        assert_eq!(
1703            rels(
1704                &store
1705                    .find_links_to(Path::new("records/contacts/sarah"))
1706                    .unwrap()
1707            ),
1708            vec!["wiki/a.md".to_string()]
1709        );
1710        assert_eq!(
1711            rels(
1712                &store
1713                    .find_links_to(Path::new("records/contacts/sarah-chen"))
1714                    .unwrap()
1715            ),
1716            vec!["wiki/b.md".to_string()]
1717        );
1718    }
1719
1720    // ── find_links_to_any (batch — the O(changed × store) fix) ─────────────────
1721
1722    /// The working-set validate's incoming-linker discovery runs through
1723    /// `find_links_to_any` over the WHOLE changed set in one pass. This pins the
1724    /// batch contract that makes that single-pass behavior correct: the result is
1725    /// the union of incoming linkers across every target, with per-target
1726    /// boundary correctness preserved (no alternation arm bleeds into a
1727    /// prefix-sharing sibling). If a regression reverts the batch finder to a
1728    /// per-object loop, the union below would still hold — but the boundary +
1729    /// union-equivalence assertions are what guard the *correctness* of folding N
1730    /// scans into one regex.
1731    #[test]
1732    fn find_links_to_any_returns_the_union_with_boundary_correctness() {
1733        let dir = empty_store();
1734        let root = dir.path();
1735
1736        // Two distinct targets, each with its own linker.
1737        write(
1738            root,
1739            "wiki/links-sarah.md",
1740            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
1741        );
1742        write(
1743            root,
1744            "wiki/links-acme.md",
1745            "---\ntype: wiki-page\nsummary: s\n---\nDeal with [[records/companies/acme|Acme]].\n",
1746        );
1747        // One file links to BOTH targets — must appear exactly once (deduped),
1748        // proving the per-file early-exit folds multiple-target hits into a
1749        // single result row rather than one row per matched target.
1750        write(
1751            root,
1752            "records/meetings/2026/05/m.md",
1753            "---\ntype: meeting\nsummary: s\n---\n[[records/contacts/sarah-chen]] re \
1754             [[records/companies/acme]]\n",
1755        );
1756        // A prefix-sharing sibling of a target: a link to `sarah-chen-jr` must NOT
1757        // be reported as a link to `sarah-chen` even though the alternation now
1758        // carries `sarah-chen` as one arm.
1759        write(
1760            root,
1761            "wiki/links-jr.md",
1762            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen-jr]]\n",
1763        );
1764        // A file that links to neither requested target.
1765        write(
1766            root,
1767            "wiki/unrelated.md",
1768            "---\ntype: wiki-page\nsummary: s\n---\n[[wiki/themes/spend]]\n",
1769        );
1770
1771        let store = open(&dir);
1772        let targets = vec![
1773            PathBuf::from("records/contacts/sarah-chen"),
1774            PathBuf::from("records/companies/acme"),
1775        ];
1776
1777        let got = rels(&store.find_links_to_any(&targets).unwrap());
1778        assert_eq!(
1779            got,
1780            vec![
1781                "records/meetings/2026/05/m.md".to_string(),
1782                "wiki/links-acme.md".to_string(),
1783                "wiki/links-sarah.md".to_string(),
1784            ],
1785            "batch finder must return the deduped union of linkers across all \
1786             targets, excluding the prefix-sibling and the unrelated file"
1787        );
1788
1789        // Equivalence: the batch result must equal the union of the per-target
1790        // single finder. This is the property the working-set path relies on
1791        // when it folds one-scan-per-object into one scan for the whole set.
1792        let mut union: std::collections::BTreeSet<PathBuf> = std::collections::BTreeSet::new();
1793        for t in &targets {
1794            for linker in store.find_links_to(t).unwrap() {
1795                union.insert(linker);
1796            }
1797        }
1798        assert_eq!(
1799            rels(&union.into_iter().collect::<Vec<_>>()),
1800            got,
1801            "find_links_to_any must equal the union of per-target find_links_to"
1802        );
1803    }
1804
1805    /// An empty target set must scan nothing and find nothing — and crucially
1806    /// must NOT compile to a match-everything empty regex (which would report
1807    /// every `.md` as a linker). This is the empty-working-set fast path the
1808    /// `validate` loop hits when nothing changed.
1809    #[test]
1810    fn find_links_to_any_empty_targets_matches_nothing() {
1811        let dir = empty_store();
1812        let root = dir.path();
1813        write(
1814            root,
1815            "wiki/a.md",
1816            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
1817        );
1818        let store = open(&dir);
1819
1820        assert!(
1821            store.find_links_to_any(&[]).unwrap().is_empty(),
1822            "no targets ⇒ no linkers (an empty pattern must not match every file)"
1823        );
1824        // A set of only empty/non-link targets is likewise a no-op, not a
1825        // match-everything.
1826        assert!(
1827            store
1828                .find_links_to_any(&[PathBuf::from(""), PathBuf::from("./")])
1829                .unwrap()
1830                .is_empty(),
1831            "targets that render to empty link text contribute no alternation arm"
1832        );
1833    }
1834
1835    // ── read_type_index ──────────────────────────────────────────────────────
1836
1837    #[test]
1838    fn read_type_index_parses_records_and_flattens_fields() {
1839        let dir = empty_store();
1840        let root = dir.path();
1841        let jsonl = "\
1842{\"path\":\"records/expenses/2026/05/a.md\",\"type\":\"expense\",\"summary\":\"lunch\",\"tags\":[\"meals\"],\"links\":[\"records/companies/acme\"],\"created\":\"2026-05-01T00:00:00Z\",\"updated\":\"2026-05-01T00:00:00Z\",\"vendor\":\"acme\",\"amount\":42}
1843{\"path\":\"records/expenses/2026/05/b.md\",\"type\":\"expense\",\"summary\":\"taxi\",\"created\":null,\"updated\":null,\"vendor\":\"yellow\"}
1844";
1845        let p = write(root, "records/expenses/index.jsonl", jsonl);
1846        let store = open(&dir);
1847        let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
1848
1849        assert_eq!(recs.len(), 2);
1850        // Sorted by path asc.
1851        assert_eq!(recs[0].path, PathBuf::from("records/expenses/2026/05/a.md"));
1852        assert_eq!(recs[0].type_, "expense");
1853        assert_eq!(recs[0].summary, "lunch");
1854        assert_eq!(recs[0].tags, vec!["meals".to_string()]);
1855        assert_eq!(recs[0].links, vec!["records/companies/acme".to_string()]);
1856        assert!(recs[0].created.is_some());
1857        // Extra (non-typed) frontmatter flattens into `fields`.
1858        assert_eq!(
1859            recs[0].fields.get("vendor"),
1860            Some(&serde_json::json!("acme"))
1861        );
1862        assert_eq!(recs[0].fields.get("amount"), Some(&serde_json::json!(42)));
1863        // Defaults: missing tags/links → empty.
1864        assert!(recs[1].tags.is_empty());
1865        assert!(recs[1].links.is_empty());
1866    }
1867
1868    #[test]
1869    fn read_type_index_last_write_wins_and_skips_blanks() {
1870        let dir = empty_store();
1871        let root = dir.path();
1872        // Same path twice; the second line supersedes the first. A blank line
1873        // in between must be ignored, not error.
1874        let jsonl = "\
1875{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"old\",\"created\":null,\"updated\":null}
1876
1877{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"new\",\"created\":null,\"updated\":null}
1878";
1879        let p = write(root, "records/contacts/index.jsonl", jsonl);
1880        let store = open(&dir);
1881        let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
1882        assert_eq!(recs.len(), 1, "duplicate path collapses to one record");
1883        assert_eq!(recs[0].summary, "new", "later line must win");
1884    }
1885
1886    #[test]
1887    fn read_type_index_errors_on_malformed_line() {
1888        let dir = empty_store();
1889        let root = dir.path();
1890        let p = write(root, "records/contacts/index.jsonl", "{not valid json}\n");
1891        let store = open(&dir);
1892        let err = store.read_type_index(&store.abs_path(&p)).unwrap_err();
1893        assert!(matches!(err, StoreError::BadTypeIndex { .. }));
1894    }
1895
1896    // ── find_by_type / find_by_where ─────────────────────────────────────────
1897
1898    fn jsonl_line(path: &str, type_: &str, summary: &str, extra: &str) -> String {
1899        format!(
1900            "{{\"path\":\"{path}\",\"type\":\"{type_}\",\"summary\":\"{summary}\",\"created\":null,\"updated\":null{extra}}}\n"
1901        )
1902    }
1903
1904    #[test]
1905    fn find_by_type_reads_canonical_folder_sidecar() {
1906        let dir = empty_store();
1907        let root = dir.path();
1908        // Canonical folder for `contact` is records/contacts.
1909        write(
1910            root,
1911            "records/contacts/index.jsonl",
1912            &(jsonl_line("records/contacts/sarah.md", "contact", "Sarah", "")
1913                + &jsonl_line("records/contacts/elena.md", "contact", "Elena", "")),
1914        );
1915        // A different type's sidecar must not leak into a contact query.
1916        write(
1917            root,
1918            "records/companies/index.jsonl",
1919            &jsonl_line("records/companies/acme.md", "company", "Acme", ""),
1920        );
1921        let store = open(&dir);
1922        let recs = store.find_by_type("contact").unwrap();
1923        let names: Vec<_> = recs.iter().map(|r| r.summary.clone()).collect();
1924        assert_eq!(names, vec!["Elena".to_string(), "Sarah".to_string()]); // path-sorted
1925        assert!(recs.iter().all(|r| r.type_ == "contact"));
1926    }
1927
1928    #[test]
1929    fn find_by_type_canonical_absent_falls_back_within_the_layer_only() {
1930        let dir = empty_store();
1931        let root = dir.path();
1932        // A custom `proposal` record filed under a non-canonical folder NAME
1933        // (the natural plural `records/proposals/`) inside the records layer.
1934        // `default_type_folder("proposal")` = `records/proposal` (bare type, no
1935        // pluralization guess), so the canonical sidecar does not exist and
1936        // `find_by_type` falls back. The fallback is bounded to the type's
1937        // layer (records), so this record — same layer, non-canonical folder —
1938        // is still found: completeness within the layer holds.
1939        write(
1940            root,
1941            "records/proposals/index.jsonl",
1942            &jsonl_line("records/proposals/p1.md", "proposal", "Q3 proposal", ""),
1943        );
1944        // A DECOY of the SAME type sitting in a DIFFERENT layer (sources/). The
1945        // old whole-store fallback read every sidecar in the store and would
1946        // have leaked this into the result; the layer-bounded fallback must not.
1947        // It also pins that the fallback is O(entities-in-layer), never O(store).
1948        write(
1949            root,
1950            "sources/proposals/index.jsonl",
1951            &jsonl_line(
1952                "sources/proposals/leak.md",
1953                "proposal",
1954                "cross-layer decoy",
1955                "",
1956            ),
1957        );
1958        let store = open(&dir);
1959        let recs = store.find_by_type("proposal").unwrap();
1960        assert_eq!(
1961            recs.len(),
1962            1,
1963            "only the records-layer proposal, not the sources decoy"
1964        );
1965        assert_eq!(recs[0].summary, "Q3 proposal");
1966        assert_eq!(recs[0].path, PathBuf::from("records/proposals/p1.md"));
1967    }
1968
1969    #[test]
1970    fn find_by_type_canonical_absent_does_not_read_other_layers() {
1971        let dir = empty_store();
1972        let root = dir.path();
1973        // `email`'s canonical folder is `sources/emails` (layer Sources). No
1974        // sidecar there yet, so `find_by_type("email")` falls back — but only
1975        // within the Sources layer. A populated sidecar in the Records layer
1976        // must never be touched: the fallback is layer-bounded, not store-wide.
1977        // Under the old `read_all_type_indexes_in(None)` fallback this records
1978        // sidecar would have been read and filtered (wasted O(store) I/O); now
1979        // it is outside the walk root entirely.
1980        write(
1981            root,
1982            "records/contacts/index.jsonl",
1983            &jsonl_line("records/contacts/sarah.md", "contact", "Sarah", ""),
1984        );
1985        let store = open(&dir);
1986        // No email anywhere ⇒ empty, and the records layer was not in scope.
1987        assert!(store.find_by_type("email").unwrap().is_empty());
1988    }
1989
1990    #[test]
1991    fn find_by_where_matches_typed_columns_and_flat_fields() {
1992        let dir = empty_store();
1993        let root = dir.path();
1994        write(
1995            root,
1996            "records/expenses/index.jsonl",
1997            &(jsonl_line(
1998                "records/expenses/a.md",
1999                "expense",
2000                "lunch",
2001                ",\"vendor\":\"acme\",\"tags\":[\"meals\"]",
2002            ) + &jsonl_line(
2003                "records/expenses/b.md",
2004                "expense",
2005                "taxi",
2006                ",\"vendor\":\"yellow\"",
2007            )),
2008        );
2009        write(
2010            root,
2011            "records/contacts/index.jsonl",
2012            &jsonl_line(
2013                "records/contacts/sarah.md",
2014                "contact",
2015                "Sarah",
2016                ",\"tags\":[\"customer\"]",
2017            ),
2018        );
2019        let store = open(&dir);
2020
2021        // Flat field in `fields`.
2022        let by_vendor = store.find_by_where("vendor", "acme").unwrap();
2023        assert_eq!(by_vendor.len(), 1);
2024        assert_eq!(by_vendor[0].path, PathBuf::from("records/expenses/a.md"));
2025
2026        // Typed column: type (spans both expense records).
2027        assert_eq!(store.find_by_where("type", "expense").unwrap().len(), 2);
2028
2029        // Typed list column: tags membership.
2030        let customers = store.find_by_where("tags", "customer").unwrap();
2031        assert_eq!(customers.len(), 1);
2032        assert_eq!(
2033            customers[0].path,
2034            PathBuf::from("records/contacts/sarah.md")
2035        );
2036
2037        // No match → empty.
2038        assert!(store.find_by_where("vendor", "nobody").unwrap().is_empty());
2039    }
2040
2041    #[test]
2042    fn find_by_where_matches_timestamps_across_rfc3339_spellings() {
2043        let dir = empty_store();
2044        let root = dir.path();
2045        // db.md files most commonly carry the `Z` UTC spelling. The index.jsonl
2046        // serialized from such a file preserves it verbatim.
2047        write(
2048            root,
2049            "records/meetings/index.jsonl",
2050            "{\"path\":\"records/meetings/kickoff.md\",\"type\":\"meeting\",\
2051\"summary\":\"kickoff\",\"created\":\"2026-05-01T00:00:00Z\",\
2052\"updated\":\"2026-05-02T09:30:00-07:00\"}\n",
2053        );
2054        let store = open(&dir);
2055
2056        // The exact value an agent reads out of the file (`Z` form) must match.
2057        let by_z = store
2058            .find_by_where("created", "2026-05-01T00:00:00Z")
2059            .unwrap();
2060        assert_eq!(by_z.len(), 1);
2061        assert_eq!(by_z[0].path, PathBuf::from("records/meetings/kickoff.md"));
2062
2063        // The equivalent explicit-offset spelling of the same instant matches too.
2064        assert_eq!(
2065            store
2066                .find_by_where("created", "2026-05-01T00:00:00+00:00")
2067                .unwrap()
2068                .len(),
2069            1
2070        );
2071
2072        // A non-UTC stored value matches both its own offset spelling and the
2073        // same instant expressed as `Z` (instant comparison, not string compare).
2074        assert_eq!(
2075            store
2076                .find_by_where("updated", "2026-05-02T09:30:00-07:00")
2077                .unwrap()
2078                .len(),
2079            1
2080        );
2081        assert_eq!(
2082            store
2083                .find_by_where("updated", "2026-05-02T16:30:00Z")
2084                .unwrap()
2085                .len(),
2086            1
2087        );
2088
2089        // A different instant does not match.
2090        assert!(store
2091            .find_by_where("created", "2026-05-01T00:00:01Z")
2092            .unwrap()
2093            .is_empty());
2094        // A non-RFC3339 query value never matches a real timestamp.
2095        assert!(store
2096            .find_by_where("created", "2026-05-01")
2097            .unwrap()
2098            .is_empty());
2099    }
2100
2101    #[test]
2102    fn find_by_where_in_layer_reads_only_that_layers_sidecars() {
2103        // The O(entities-in-layer) contract: a layer-scoped where read must walk
2104        // ONLY the named layer's subtree. Proven structurally — a *malformed*
2105        // sidecar in another layer would make `read_type_index` error if it were
2106        // read, so a scoped read that succeeds (and excludes that record) is
2107        // proof the other layer's I/O never happened.
2108        let dir = empty_store();
2109        let root = dir.path();
2110        write(
2111            root,
2112            "records/companies/index.jsonl",
2113            &jsonl_line(
2114                "records/companies/acme.md",
2115                "company",
2116                "Acme",
2117                ",\"domain\":\"acme.com\"",
2118            ),
2119        );
2120        // Same field/value in the sources layer — but the sidecar is corrupt.
2121        write(
2122            root,
2123            "sources/emails/index.jsonl",
2124            "{ this is not valid json and would error if read }\n",
2125        );
2126        let store = open(&dir);
2127
2128        // Scoped to records: the corrupt sources sidecar is out of scope, so the
2129        // read succeeds and returns only the records-layer match.
2130        let in_records = store
2131            .find_by_where_in("domain", "acme.com", Some(Layer::Records))
2132            .expect("a records-scoped read must not touch the sources sidecar");
2133        assert_eq!(
2134            rels(
2135                &in_records
2136                    .iter()
2137                    .map(|r| r.path.clone())
2138                    .collect::<Vec<_>>()
2139            ),
2140            vec!["records/companies/acme.md".to_string()]
2141        );
2142
2143        // The store-wide read DOES reach the corrupt sidecar and surfaces it as
2144        // a parse error — confirming the corrupt file is genuinely in the tree
2145        // and that only the layer scope spares it.
2146        let store_wide = store.find_by_where("domain", "acme.com");
2147        assert!(
2148            matches!(store_wide, Err(StoreError::BadTypeIndex { .. })),
2149            "unscoped read walks every layer and hits the corrupt sidecar"
2150        );
2151
2152        // Scoping to the layer that holds only the corrupt sidecar still errors
2153        // (the scope includes it), proving the scope is a real subtree bound and
2154        // not a silent "skip anything that fails".
2155        let in_sources = store.find_by_where_in("domain", "acme.com", Some(Layer::Sources));
2156        assert!(matches!(in_sources, Err(StoreError::BadTypeIndex { .. })));
2157    }
2158
2159    #[test]
2160    fn find_by_where_in_missing_layer_is_empty_not_an_error() {
2161        // A layer-scoped read over a layer folder that does not exist yet must
2162        // return empty (mirrors `walk_layer`'s missing-dir guard), never a walk
2163        // error from `ignore` over a nonexistent path.
2164        let dir = empty_store();
2165        let root = dir.path();
2166        write(
2167            root,
2168            "records/contacts/index.jsonl",
2169            &jsonl_line(
2170                "records/contacts/sarah.md",
2171                "contact",
2172                "Sarah",
2173                ",\"city\":\"denver\"",
2174            ),
2175        );
2176        let store = open(&dir);
2177
2178        // `wiki/` was never created.
2179        let in_wiki = store
2180            .find_by_where_in("city", "denver", Some(Layer::Wiki))
2181            .expect("missing layer subtree is empty, not an error");
2182        assert!(in_wiki.is_empty());
2183
2184        // Same query scoped to the layer that has the record still finds it.
2185        let in_records = store
2186            .find_by_where_in("city", "denver", Some(Layer::Records))
2187            .unwrap();
2188        assert_eq!(in_records.len(), 1);
2189    }
2190
2191    // ── abs_path / rel_path ──────────────────────────────────────────────────
2192
2193    #[test]
2194    fn abs_and_rel_path_roundtrip() {
2195        let dir = empty_store();
2196        let store = open(&dir);
2197        let rel = Path::new("records/contacts/sarah.md");
2198        let abs = store.abs_path(rel);
2199        assert_eq!(abs, dir.path().join(rel));
2200        assert_eq!(store.rel_path(&abs).as_deref(), Some(rel));
2201
2202        // An absolute path is passed through unchanged by abs_path.
2203        assert_eq!(store.abs_path(&abs), abs);
2204
2205        // A path outside the store has no store-relative form.
2206        assert_eq!(store.rel_path(Path::new("/somewhere/else.md")), None);
2207    }
2208
2209    // ── infer_type_from_path (inverse of default_type_folder) ────────────────
2210
2211    #[test]
2212    fn infer_type_maps_every_recognized_folder_back_to_its_type() {
2213        let cases = [
2214            ("sources/emails/x.md", "email"),
2215            ("sources/transcripts/x.md", "transcript"),
2216            ("sources/docs/x.md", "pdf-source"),
2217            ("records/contacts/x.md", "contact"),
2218            ("records/companies/x.md", "company"),
2219            ("records/expenses/x.md", "expense"),
2220            ("records/meetings/x.md", "meeting"),
2221            ("records/decisions/x.md", "decision"),
2222            ("records/invoices/x.md", "invoice"),
2223            // Any wiki sub-folder infers `wiki-page` regardless of the topic name.
2224            ("wiki/topics/x.md", "wiki-page"),
2225            ("wiki/pricing/x.md", "wiki-page"),
2226        ];
2227        for (path, expected) in cases {
2228            assert_eq!(
2229                infer_type_from_path(Path::new(path)).as_deref(),
2230                Some(expected),
2231                "path {path} should infer type {expected}"
2232            );
2233        }
2234    }
2235
2236    #[test]
2237    fn infer_type_round_trips_with_default_type_folder() {
2238        // The canonical invariant: inference is the inverse of the forward map.
2239        // Every recognized type, routed through `default_type_folder` and then
2240        // back through `infer_type_from_path`, must return the original type.
2241        // `wiki-page` is the one many-to-one case (every topic folder maps back
2242        // to `wiki-page`), so its forward folder still round-trips.
2243        let recognized = [
2244            "email",
2245            "transcript",
2246            "pdf-source",
2247            "contact",
2248            "company",
2249            "expense",
2250            "meeting",
2251            "decision",
2252            "invoice",
2253            "wiki-page",
2254        ];
2255        for type_ in recognized {
2256            let folder = default_type_folder(type_);
2257            let file = folder.join("x.md");
2258            assert_eq!(
2259                infer_type_from_path(&file).as_deref(),
2260                Some(type_),
2261                "recognized type {type_} (folder {folder:?}) must round-trip"
2262            );
2263        }
2264    }
2265
2266    #[test]
2267    fn infer_type_round_trips_custom_types_verbatim_no_singularization() {
2268        // Regression guard for the CLI/core divergence: `default_type_folder`'s
2269        // unrecognized fallback is the BARE type name (`task → records/task`,
2270        // `tasks → records/tasks`). Inference must NOT singularize, or a custom
2271        // type would not round-trip (e.g. `records/tasks` → `task` would clash
2272        // with `default_type_folder("task") → records/task`).
2273        for custom in ["task", "tasks", "playbook", "process", "okrs", "ticket"] {
2274            let folder = default_type_folder(custom);
2275            assert_eq!(folder, PathBuf::from("records").join(custom));
2276            let file = folder.join("x.md");
2277            assert_eq!(
2278                infer_type_from_path(&file).as_deref(),
2279                Some(custom),
2280                "custom type {custom} must round-trip verbatim (no singularization)"
2281            );
2282        }
2283
2284        // The specific case named in the finding: a plural custom folder keeps
2285        // its trailing `s`; it is NOT singularized to `task`.
2286        assert_eq!(
2287            infer_type_from_path(Path::new("records/tasks/x.md")).as_deref(),
2288            Some("tasks"),
2289            "records/tasks must infer `tasks`, not `task`"
2290        );
2291    }
2292
2293    #[test]
2294    fn infer_type_requires_three_component_layer_folder_file_shape() {
2295        // Fewer than 3 components: a file directly under a layer has no
2296        // type-folder, so inference yields None (matches the old CLI contract).
2297        assert_eq!(infer_type_from_path(Path::new("records/x.md")), None);
2298        assert_eq!(infer_type_from_path(Path::new("sources/x.md")), None);
2299        assert_eq!(infer_type_from_path(Path::new("wiki/x.md")), None);
2300        assert_eq!(infer_type_from_path(Path::new("x.md")), None);
2301        // Unknown leading layer is never inferred.
2302        assert_eq!(infer_type_from_path(Path::new("foo/bar/x.md")), None);
2303        // Deeper paths still infer from the first type-folder segment (e.g. a
2304        // sharded record under records/expenses/2026/05/x.md).
2305        assert_eq!(
2306            infer_type_from_path(Path::new("records/expenses/2026/05/x.md")).as_deref(),
2307            Some("expense"),
2308        );
2309    }
2310}