Skip to main content

dbmd_core/
store.rs

1//! `store` — walk, locate, and shard a db.md store.
2//!
3//! A db.md store is one directory marked by an uppercase `DB.md` at its root.
4//! [`Store::open`] is the single gate every store-walking subcommand goes
5//! through; a missing `DB.md` is the [`NotAStore`] error (`NOT_A_STORE`). The
6//! toolkit never guesses a store root.
7//!
8//! Scale discipline lives here: [`Store::walk`] and the layer/type-folder
9//! walks are **SWEEP** primitives used only by `validate --all`,
10//! `index rebuild`, and `stats`. The interactive loop instead uses
11//! [`Store::find_links_to`] / [`Store::find_links_to_any`] (embedded ripgrep,
12//! presence-only) and the `index.jsonl` sidecar readers
13//! ([`Store::find_by_type`] / [`Store::find_by_where`] /
14//! [`Store::read_type_index`]) — never a whole-store parse. The batch
15//! [`Store::find_links_to_any`] is what keeps the working-set validate's
16//! incoming-linker discovery a single store scan rather than one scan per
17//! changed object.
18
19use std::collections::BTreeMap;
20use std::path::{Path, PathBuf};
21
22use chrono::{DateTime, Datelike, FixedOffset};
23use grep::regex::RegexMatcher;
24use grep::searcher::sinks::UTF8;
25use grep::searcher::Searcher;
26use ignore::WalkBuilder;
27
28use crate::index::IndexRecord;
29use crate::parser::{parse_db_md, Config, Frontmatter};
30
31/// Basenames that are never content files: the config marker and the two
32/// curator-maintained catalogs. The store walks skip these so a SWEEP over the
33/// content layers never mistakes a catalog for a record.
34const NON_CONTENT_BASENAMES: [&str; 3] = ["DB.md", "index.md", "log.md"];
35
36/// The complete machine-twin sidecar that backs every structured read.
37const TYPE_INDEX_FILE: &str = "index.jsonl";
38
39/// Returned when a path is opened as a store but has no `DB.md` at its root.
40/// Surfaced as the structured code `NOT_A_STORE` with a non-zero exit.
41#[derive(Debug, thiserror::Error)]
42#[error("not a db.md store: {path} has no DB.md")]
43pub struct NotAStore {
44    /// The path that was inspected.
45    pub path: PathBuf,
46}
47
48/// Errors from store-level operations (walk, locate, shard, sidecar read).
49#[derive(Debug, thiserror::Error)]
50pub enum StoreError {
51    /// A sidecar `index.jsonl` could not be read or parsed.
52    #[error("failed to read type index {path}: {message}")]
53    BadTypeIndex {
54        /// The sidecar file.
55        path: PathBuf,
56        /// What went wrong.
57        message: String,
58    },
59
60    /// A required date field for sharding was absent or unparseable, and there
61    /// was no usable fallback.
62    #[error("cannot compute shard path for {file}: no usable date field")]
63    NoShardDate {
64        /// The file being placed.
65        file: PathBuf,
66    },
67
68    /// An embedded-ripgrep scan failed to start or run.
69    #[error("search failed under {root}: {message}")]
70    Search {
71        /// The root the scan ran under.
72        root: PathBuf,
73        /// What went wrong.
74        message: String,
75    },
76
77    /// An underlying I/O failure.
78    #[error(transparent)]
79    Io(#[from] std::io::Error),
80}
81
82/// The three canonical layers of a db.md store.
83///
84/// `Ord`/`PartialOrd` are derived (additively) because sibling modules key
85/// `BTreeMap`s on `Layer` (e.g. `stats::Stats::files_per_layer`); the canonical
86/// declaration order (`Sources` < `Records` < `Wiki`) is the sort order.
87#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
88pub enum Layer {
89    /// `sources/` — raw evidence; immutable; date-sharded at scale.
90    Sources,
91    /// `records/` — atomic typed data; entity types flat, event types sharded.
92    Records,
93    /// `wiki/` — curator-synthesized narrative; flat.
94    Wiki,
95}
96
97impl Layer {
98    /// The on-disk folder name for this layer (`"sources"` / `"records"` /
99    /// `"wiki"`).
100    pub fn dir_name(self) -> &'static str {
101        match self {
102            Layer::Sources => "sources",
103            Layer::Records => "records",
104            Layer::Wiki => "wiki",
105        }
106    }
107
108    /// Parse a layer from its folder name; `None` for anything else.
109    pub fn from_dir_name(name: &str) -> Option<Self> {
110        match name {
111            "sources" => Some(Layer::Sources),
112            "records" => Some(Layer::Records),
113            "wiki" => Some(Layer::Wiki),
114            _ => None,
115        }
116    }
117
118    /// Every layer, in canonical order.
119    pub fn all() -> [Layer; 3] {
120        [Layer::Sources, Layer::Records, Layer::Wiki]
121    }
122}
123
124/// An opened db.md store: its root path plus the parsed `DB.md` [`Config`].
125///
126/// Construct via [`Store::open`]; that is the only path in, and it validates
127/// the `DB.md` marker so downstream code can assume a real store.
128#[derive(Debug, Clone)]
129pub struct Store {
130    /// The store root (the directory containing `DB.md`).
131    pub root: PathBuf,
132    /// The parsed `DB.md` config (agent instructions, policies, schemas).
133    pub config: Config,
134}
135
136impl Store {
137    /// True if `path` is a db.md store root: an uppercase `DB.md` file exists
138    /// at `path`. On case-sensitive filesystems a lowercase `db.md` must NOT
139    /// count (the lowercase name refers to the project/spec, not the marker).
140    pub fn is_db_md_store(path: &Path) -> bool {
141        // Read the directory and match the *stored* filename byte-for-byte.
142        // `path.join("DB.md").exists()` would lie on a case-insensitive
143        // filesystem (macOS default), where a lowercase `db.md` answers a
144        // `DB.md` probe. `read_dir` returns the real on-disk name, so the
145        // exact-match check is correct on both case-sensitive (Linux) and
146        // case-insensitive filesystems.
147        let entries = match std::fs::read_dir(path) {
148            Ok(entries) => entries,
149            Err(_) => return false,
150        };
151        for entry in entries.flatten() {
152            if entry.file_name() == "DB.md" {
153                // A directory literally named `DB.md` is not the marker.
154                match entry.file_type() {
155                    Ok(ft) if ft.is_dir() => return false,
156                    Ok(_) => return true,
157                    Err(_) => return false,
158                }
159            }
160        }
161        false
162    }
163
164    /// Open `path` as a db.md store: confirm the `DB.md` marker (else
165    /// [`NotAStore`]) and parse the `DB.md` config. Every store-walking
166    /// subcommand opens through here.
167    pub fn open(path: &Path) -> Result<Store, NotAStore> {
168        if !Store::is_db_md_store(path) {
169            return Err(NotAStore {
170                path: path.to_path_buf(),
171            });
172        }
173        let db_md = path.join("DB.md");
174        // The marker exists; parse its config. A read or parse failure leaves
175        // the store openable with default config rather than masquerading as
176        // NOT_A_STORE — the marker is present, so this *is* a store; a damaged
177        // DB.md is `dbmd validate`'s job to report, not `open`'s.
178        let config = match std::fs::read_to_string(&db_md) {
179            Ok(text) => parse_db_md(&text, &db_md).unwrap_or_default(),
180            Err(_) => Config::default(),
181        };
182        Ok(Store {
183            root: path.to_path_buf(),
184            config,
185        })
186    }
187
188    /// **SWEEP.** Recursively iterate every `.md` content file across
189    /// `sources/`, `records/`, and `wiki/`, skipping hidden dirs and `log/`.
190    /// Used only by `validate --all`, `index rebuild`, and `stats` — never on
191    /// the interactive loop.
192    pub fn walk(&self) -> Result<Vec<PathBuf>, StoreError> {
193        // Only the three content layers — never root meta files (`DB.md`,
194        // `index.md`, `log.md`) and never `log/`, which live at root and are
195        // outside every layer dir.
196        let mut out = Vec::new();
197        for layer in Layer::all() {
198            out.extend(self.walk_layer(layer)?);
199        }
200        out.sort();
201        Ok(out)
202    }
203
204    /// **SWEEP.** Like [`Store::walk`] but scoped to a single layer.
205    pub fn walk_layer(&self, layer: Layer) -> Result<Vec<PathBuf>, StoreError> {
206        let layer_root = self.root.join(layer.dir_name());
207        if !layer_root.is_dir() {
208            return Ok(Vec::new());
209        }
210        self.walk_content_md(&layer_root)
211    }
212
213    /// Enumerate every `.md` file in a single type-folder, **recursing through
214    /// its date-shards** (`sources/emails/**/*.md`). The unit the index builder
215    /// and per-folder rebuild operate on. SWEEP-class (scoped to one folder).
216    pub fn walk_type_folder(&self, type_folder: &Path) -> Result<Vec<PathBuf>, StoreError> {
217        let abs = self.resolve_under_root(type_folder);
218        if !abs.is_dir() {
219            return Ok(Vec::new());
220        }
221        self.walk_content_md(&abs)
222    }
223
224    /// The ≤`n` most-recent files in a type-folder by frontmatter `updated`
225    /// (descending), ties broken by store-relative path (ascending) — a total
226    /// order, so write-through and rebuild never disagree on #500 vs #501.
227    ///
228    /// Reads `updated` across the folder's shards — a SWEEP cost absorbed into
229    /// `index rebuild`. The write-through path never calls this. The
230    /// cap-selection primitive for the 500-entry `index.md` browse view.
231    pub fn recent_in_type_folder(
232        &self,
233        type_folder: &Path,
234        n: usize,
235    ) -> Result<Vec<PathBuf>, StoreError> {
236        let files = self.walk_type_folder(type_folder)?;
237        // (updated, rel-path) for each file. Files missing/unparseable
238        // `updated` sort *after* dated ones (None last), then by path — so they
239        // are deterministically the lowest-priority candidates for the cap, not
240        // dropped silently. The total order (updated desc, path asc) is what
241        // keeps write-through and rebuild agreeing on #500 vs #501.
242        let mut keyed: Vec<(Option<DateTime<FixedOffset>>, PathBuf)> = files
243            .into_iter()
244            .map(|rel| {
245                let updated = self.read_updated(&self.abs_path(&rel));
246                (updated, rel)
247            })
248            .collect();
249        keyed.sort_by(|a, b| {
250            // `updated` descending: newest first. `None` is treated as the
251            // oldest possible, so dated files always win a cap slot over
252            // undated ones.
253            let by_updated = b.0.cmp(&a.0);
254            by_updated.then_with(|| a.1.cmp(&b.1))
255        });
256        keyed.truncate(n);
257        Ok(keyed.into_iter().map(|(_, rel)| rel).collect())
258    }
259
260    /// The shard/flat predicate: true if the type date-shards, false if it
261    /// stays flat. True for source types and event record types
262    /// (`expense`/`invoice`/`meeting` + custom `order`/`ticket`/`transaction`),
263    /// or when `DB.md ## Schemas` declares `shard: by-date`. False for
264    /// dedup-bounded entity types (`contact`/`company`/`decision`) and `wiki/`.
265    pub fn type_shards(&self, type_: &str) -> bool {
266        // Built-in classification. Sharding is a property of the *type*:
267        //  - source types carry a primary date field and shard;
268        //  - event record types track business volume and shard;
269        //  - dedup-bounded entity types and curation-bounded wiki stay flat.
270        // NOTE: the SPEC's `DB.md ## Schemas` `shard: by-date` override has no
271        // representation in the frozen `Schema`/`FieldSpec` types (no shard
272        // flag), so it cannot be consulted here yet — see the store findings.
273        matches!(
274            type_,
275            // source types
276            "email" | "transcript" | "pdf-source"
277            // event record types (canonical)
278            | "expense" | "invoice" | "meeting"
279            // event record types (recognized custom, per the plan)
280            | "order" | "ticket" | "transaction"
281        )
282    }
283
284    /// Compute the canonical write path for a new file. For a sharding type
285    /// (per [`Store::type_shards`]) insert `<YYYY>/<MM>/` from the type's
286    /// primary date field (`email.date`, `expense.date`, … fallback `created`)
287    /// under the type folder; flat types and `wiki/` get no shard segment.
288    /// Deterministic + stable: same input → same path, so a record never moves
289    /// once written.
290    pub fn shard_path_for(
291        &self,
292        type_: &str,
293        frontmatter: &Frontmatter,
294        name: &str,
295    ) -> Result<PathBuf, StoreError> {
296        self.shard_path_in(&default_type_folder(type_), type_, frontmatter, name)
297    }
298
299    /// Like [`Store::shard_path_for`], but compute the path under an explicit,
300    /// caller-resolved type-folder rather than the canonical default. This lets a
301    /// write surface honour an agent-supplied conforming sub-folder — e.g.
302    /// `wiki/projects/`, `wiki/people/`, `wiki/synthesis/` (the SPEC files a
303    /// `wiki-page` under `wiki/<topic>/`, i.e. ANY topic sub-folder, not only the
304    /// `wiki/topics` default) — while still applying date-sharding for sharding
305    /// types. The folder must be a conforming `<layer>/<type-folder>` (2
306    /// components, recognized layer); the caller is responsible for that (see the
307    /// CLI's `resolve_write_path`), so it is taken as given here.
308    ///
309    /// Sharding is still a property of the *type*: a sharding type gets the
310    /// `<YYYY>/<MM>` segment under `folder`; a flat type lands directly in it.
311    pub fn shard_path_in(
312        &self,
313        folder: &Path,
314        type_: &str,
315        frontmatter: &Frontmatter,
316        name: &str,
317    ) -> Result<PathBuf, StoreError> {
318        let folder = folder.to_path_buf();
319        let filename = ensure_md_extension(name);
320
321        if !self.type_shards(type_) {
322            // Flat type (entity records, wiki, decisions): no shard segment.
323            return Ok(folder.join(filename));
324        }
325
326        // Sharding type: derive <YYYY>/<MM> from the primary date field, with
327        // `created` as the universal fallback. Reading the public `Frontmatter`
328        // fields directly (typed `created`/`updated` + raw `extra`) avoids the
329        // not-yet-implemented `Frontmatter::get`/`parse` and keeps this pure.
330        let (year, month) = self
331            .primary_shard_segment(type_, frontmatter)
332            .ok_or_else(|| StoreError::NoShardDate {
333                file: folder.join(&filename),
334            })?;
335
336        Ok(folder.join(year).join(month).join(filename))
337    }
338
339    /// Find files with an incoming wiki-link to `target`, via **embedded
340    /// ripgrep** for `[[target]]` across all layers. Loop-fast; no whole-graph
341    /// build. Returns store-relative paths.
342    pub fn find_links_to(&self, target: &Path) -> Result<Vec<PathBuf>, StoreError> {
343        // A single target is just the degenerate batch case — one alternation
344        // arm, one store scan. Routing through `find_links_to_any` keeps the
345        // pattern construction and the scan loop in exactly one place. The
346        // batch API takes `&[PathBuf]`, so the one-element slice is owned (a
347        // single alloc on this single-target convenience path; the batch path
348        // validate.rs rides is untouched).
349        self.find_links_to_any(&[target.to_path_buf()])
350    }
351
352    /// Find every file with an incoming wiki-link to **any** of `targets`, in a
353    /// **single embedded-ripgrep pass** over the store (one `.md` walk, one
354    /// presence-only scan per file). This is the batch incoming-linker finder the
355    /// working-set [`crate::validate::validate_working_set`] sits on: it must find
356    /// the linkers for the *whole* changed set without paying a full store read
357    /// per changed object. Cost is therefore one store scan (O(store)), NOT
358    /// `targets.len() × store` — calling [`find_links_to`](Self::find_links_to)
359    /// in a loop would reread every `.md` once per target and is the exact
360    /// `O(changed × store)` blow-up this method exists to prevent. Returns
361    /// store-relative paths (deduped, sorted).
362    ///
363    /// Why content scan and not the sidecar `links` field: the sidecar projects
364    /// only the frontmatter `links:` array, so it misses edges written in the
365    /// body or in typed fields (`company: [[…]]`). Finding an incoming link to an
366    /// arbitrary path therefore requires reading file content — the same reason
367    /// the single-target finder uses ripgrep.
368    pub fn find_links_to_any(&self, targets: &[PathBuf]) -> Result<Vec<PathBuf>, StoreError> {
369        // The wiki-link doctrine: a link is the full store-relative path, no
370        // `.md` extension. A reference to a target therefore appears literally
371        // as `[[<target>]]`, optionally with a `|display` suffix and (warned
372        // but accepted) a trailing `.md`. Build ONE regex that matches all
373        // accepted spellings of an incoming link to ANY target, escaping each
374        // target so path separators / dots stay literal and the alternation
375        // arms keep their boundaries (a link to `sarah` never matches
376        // `sarah-chen`).
377        let mut arms: Vec<String> = Vec::new();
378        for target in targets {
379            let target_str = path_to_link_str(target);
380            if target_str.is_empty() {
381                continue;
382            }
383            // [[ <target> (.md)? ( | display )? ]]
384            arms.push(format!(
385                r"\[\[{}(\.md)?(\|[^\]]*)?\]\]",
386                regex::escape(&target_str)
387            ));
388        }
389        // No usable targets → no possible incoming links, and an empty pattern
390        // would compile to a match-everything regex. Short-circuit instead.
391        if arms.is_empty() {
392            return Ok(Vec::new());
393        }
394        let pattern = arms.join("|");
395
396        let matcher = RegexMatcher::new(&pattern).map_err(|e| StoreError::Search {
397            root: self.root.clone(),
398            message: format!("invalid backlink pattern: {e}"),
399        })?;
400
401        let mut hits = std::collections::BTreeSet::new();
402        // Scan every `.md` file in the store (skip hidden + `log/`), including
403        // `index.md` catalogs — an incoming reference is wherever the literal
404        // link text lives; the caller decides relevance. ONE walk for the whole
405        // target set; per file we stop at the first hit (presence is all we
406        // need), so a file that links to several targets is read once, not once
407        // per target.
408        for rel in self.walk_all_md()? {
409            let abs = self.abs_path(&rel);
410            let mut matched_here = false;
411            let mut searcher = Searcher::new();
412            let res = searcher.search_path(
413                &matcher,
414                &abs,
415                UTF8(|_lnum, _line| {
416                    matched_here = true;
417                    // Stop at the first hit: presence is all we need.
418                    Ok(false)
419                }),
420            );
421            if let Err(e) = res {
422                return Err(StoreError::Search {
423                    root: self.root.clone(),
424                    message: format!("search failed in {}: {e}", abs.display()),
425                });
426            }
427            if matched_here {
428                hits.insert(rel);
429            }
430        }
431        Ok(hits.into_iter().collect())
432    }
433
434    /// Candidate set for a `type` query: read the relevant type-folder
435    /// `index.jsonl` sidecar(s) and return their records. Complete and
436    /// cold-cache-proof — NOT a walk-and-parse or a frontmatter ripgrep scan,
437    /// and **never a store-wide read**. The common path is one sequential read
438    /// of the canonical type-folder sidecar (O(entities)); when that sidecar is
439    /// absent the read is bounded to the type's single layer subtree
440    /// (O(entities-in-layer)), so a `--type proposal` query before that folder
441    /// has been indexed still stays inside the interactive loop's O(entities)
442    /// contract instead of fanning out across every sidecar in the store.
443    pub fn find_by_type(&self, type_: &str) -> Result<Vec<IndexRecord>, StoreError> {
444        // Read the type's canonical-folder sidecar when it exists (the common,
445        // O(entities) path). Otherwise fall back to the sidecars of the *one
446        // layer* the type belongs to and filter by `type` — complete for records
447        // filed under a non-canonical folder name within that layer (e.g. a
448        // custom `proposal` filed in `records/proposals/` when the canonical
449        // guess is the bare `records/proposal/`), without the whole-store
450        // sidecar fan-out that would break the interactive loop's O(entities)
451        // contract. A type lives in exactly one layer, and `default_type_folder`
452        // always encodes it (recognized → its SPEC layer; unrecognized →
453        // `records/`), so the fallback walk is bounded to that layer's subtree —
454        // O(entities-in-layer), never O(store). Either way: sequential, complete
455        // sidecar reads, never a walk-and-parse of the tree.
456        let canonical_folder = default_type_folder(type_);
457        let canonical = self.root.join(&canonical_folder).join(TYPE_INDEX_FILE);
458        let records = if canonical.is_file() {
459            self.read_type_index(&canonical)?
460        } else {
461            self.read_all_type_indexes_in(layer_of_folder(&canonical_folder))?
462        };
463        Ok(records.into_iter().filter(|r| r.type_ == type_).collect())
464    }
465
466    /// Candidate set for a `key=value` frontmatter query, **store-wide**: read
467    /// every type-folder `index.jsonl` sidecar and filter their records. The
468    /// unscoped pre-write dedup primitive; prefer [`Store::find_by_where_in`]
469    /// with a layer scope to stay O(entities-in-layer) on the interactive loop.
470    pub fn find_by_where(&self, key: &str, value: &str) -> Result<Vec<IndexRecord>, StoreError> {
471        self.find_by_where_in(key, value, None)
472    }
473
474    /// Candidate set for a `key=value` frontmatter query, **scoped to one
475    /// layer** when `layer` is `Some`: the sidecar walk is confined to that
476    /// layer's subtree (`<root>/<layer>/`), so the I/O is O(entities-in-layer),
477    /// not O(store records). `None` keeps the store-wide read.
478    ///
479    /// This is what makes `--in <layer>` an I/O scope, not just a result
480    /// filter: a `--where`-only query (no `--type`) used to read every sidecar
481    /// in the store and narrow by layer in memory, breaking the O(entities)
482    /// contract the interactive loop depends on. With a layer in hand we walk
483    /// only that layer's sidecars.
484    pub fn find_by_where_in(
485        &self,
486        key: &str,
487        value: &str,
488        layer: Option<Layer>,
489    ) -> Result<Vec<IndexRecord>, StoreError> {
490        // A `key=value` query can target any frontmatter field across any type,
491        // so within the chosen subtree we still read every type-folder sidecar
492        // and filter. The layer (when given) bounds *which* subtree, turning a
493        // whole-store walk into a single-layer walk.
494        let records = self.read_all_type_indexes_in(layer)?;
495        Ok(records
496            .into_iter()
497            .filter(|r| record_matches_field(r, key, value))
498            .collect())
499    }
500
501    /// Every record across the type-folder `index.jsonl` sidecars, scoped to one
502    /// layer when `layer` is `Some` (the walk is confined to `<root>/<layer>/`)
503    /// else store-wide. Sequential, complete sidecar reads — never a
504    /// walk-and-parse of the content tree.
505    ///
506    /// This is the unfiltered sidecar-enumeration primitive the relationship
507    /// loop sits on: [`crate::graph::backlinks_filtered`] uses it to bound its
508    /// candidate set to the relevant layer (or the whole store) without opening
509    /// the content tree, then confirms each candidate's edge by parsing the file.
510    pub fn sidecar_records(&self, layer: Option<Layer>) -> Result<Vec<IndexRecord>, StoreError> {
511        self.read_all_type_indexes_in(layer)
512    }
513
514    /// Parse a type-folder's `index.jsonl` into [`IndexRecord`]s, applying
515    /// last-write-wins by `path` over any un-compacted lines. The sidecar-read
516    /// primitive every structured query sits on.
517    pub fn read_type_index(&self, index_jsonl: &Path) -> Result<Vec<IndexRecord>, StoreError> {
518        let text = std::fs::read_to_string(index_jsonl).map_err(|e| StoreError::BadTypeIndex {
519            path: index_jsonl.to_path_buf(),
520            message: e.to_string(),
521        })?;
522
523        // Last-write-wins by `path` over un-compacted lines: a later line for
524        // the same path supersedes an earlier one (the jsonl is append-mostly
525        // and only compacted on rebuild). Blank lines are skipped; a non-blank
526        // line that is not a valid IndexRecord is a hard parse error.
527        let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
528        for (i, line) in text.lines().enumerate() {
529            let trimmed = line.trim();
530            if trimmed.is_empty() {
531                continue;
532            }
533            let record: IndexRecord =
534                serde_json::from_str(trimmed).map_err(|e| StoreError::BadTypeIndex {
535                    path: index_jsonl.to_path_buf(),
536                    message: format!("line {}: {e}", i + 1),
537                })?;
538            by_path.insert(record.path.clone(), record);
539        }
540        // BTreeMap keyed by path → records emerge sorted by path ascending,
541        // a deterministic order independent of line order in the file.
542        Ok(by_path.into_values().collect())
543    }
544
545    /// Resolve a store-relative path to its absolute on-disk path under
546    /// [`root`](Store::root).
547    pub fn abs_path(&self, store_relative: &Path) -> PathBuf {
548        // `Path::join` returns `store_relative` unchanged if it is already
549        // absolute, so passing an absolute path through is a no-op.
550        self.root.join(store_relative)
551    }
552
553    /// Convert an absolute path under the store into its store-relative form.
554    pub fn rel_path(&self, abs: &Path) -> Option<PathBuf> {
555        abs.strip_prefix(&self.root).ok().map(|p| p.to_path_buf())
556    }
557
558    // ── Private helpers ─────────────────────────────────────────────────────
559
560    /// Resolve a caller-supplied folder path (store-relative or absolute) to an
561    /// absolute path under the store root.
562    fn resolve_under_root(&self, folder: &Path) -> PathBuf {
563        if folder.is_absolute() {
564            folder.to_path_buf()
565        } else {
566            self.root.join(folder)
567        }
568    }
569
570    /// Walk a subtree for content `.md` files (skip hidden dirs, skip `index.md`
571    /// / `DB.md` / `log.md`), returning store-relative paths. Used by the layer
572    /// and type-folder walks.
573    fn walk_content_md(&self, root: &Path) -> Result<Vec<PathBuf>, StoreError> {
574        let mut out = Vec::new();
575        for entry in self.md_walker(root).build() {
576            let entry = entry.map_err(|e| StoreError::Search {
577                root: root.to_path_buf(),
578                message: e.to_string(),
579            })?;
580            if !is_file_entry(&entry) {
581                continue;
582            }
583            let path = entry.path();
584            if !has_md_extension(path) {
585                continue;
586            }
587            if is_non_content_basename(path) {
588                continue;
589            }
590            if let Some(rel) = self.rel_path(path) {
591                out.push(rel);
592            }
593        }
594        out.sort();
595        Ok(out)
596    }
597
598    /// Walk the whole store for **every** `.md` file (including `index.md`),
599    /// skipping hidden dirs and the `log/` archive tree. Used by the backlink
600    /// scan, where the literal link text can live in any markdown file.
601    fn walk_all_md(&self) -> Result<Vec<PathBuf>, StoreError> {
602        let mut out = Vec::new();
603        for entry in self.md_walker(&self.root).build() {
604            let entry = entry.map_err(|e| StoreError::Search {
605                root: self.root.clone(),
606                message: e.to_string(),
607            })?;
608            if !is_file_entry(&entry) {
609                continue;
610            }
611            let path = entry.path();
612            if !has_md_extension(path) {
613                continue;
614            }
615            if self.is_in_log_dir(path) {
616                continue;
617            }
618            if let Some(rel) = self.rel_path(path) {
619                out.push(rel);
620            }
621        }
622        out.sort();
623        Ok(out)
624    }
625
626    /// Read and merge every type-folder `index.jsonl` sidecar under `layer`
627    /// when given, else the whole store (skip hidden + `log/`). Each sidecar is
628    /// read with last-write-wins by path; across sidecars, paths are disjoint by
629    /// construction (one sidecar per folder), so a plain concatenation preserves
630    /// completeness. A layer scope confines the walk to `<root>/<layer>/`, which
631    /// is what keeps `find_by_where_in` O(entities-in-layer).
632    fn read_all_type_indexes_in(
633        &self,
634        layer: Option<Layer>,
635    ) -> Result<Vec<IndexRecord>, StoreError> {
636        let mut out = Vec::new();
637        for sidecar in self.find_type_index_files_in(layer)? {
638            out.extend(self.read_type_index(&self.abs_path(&sidecar))?);
639        }
640        Ok(out)
641    }
642
643    /// Locate every `index.jsonl` sidecar under `layer` (when given) else the
644    /// whole store (skip hidden + `log/`), returning store-relative paths. The
645    /// walk root is `<root>/<layer>/` for a scoped read and `self.root` for the
646    /// store-wide read; a non-existent layer subtree yields no sidecars rather
647    /// than walking a missing path.
648    fn find_type_index_files_in(&self, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
649        let walk_root = match layer {
650            Some(l) => self.root.join(l.dir_name()),
651            None => self.root.clone(),
652        };
653        // A scoped walk over a layer folder that does not exist yet must be an
654        // empty result, mirroring `walk_layer`'s missing-dir guard — not a walk
655        // error from `ignore` over a nonexistent path.
656        if !walk_root.is_dir() {
657            return Ok(Vec::new());
658        }
659        let mut out = Vec::new();
660        let mut builder = WalkBuilder::new(&walk_root);
661        builder.standard_filters(false).hidden(true);
662        for entry in builder.build() {
663            let entry = entry.map_err(|e| StoreError::Search {
664                root: walk_root.clone(),
665                message: e.to_string(),
666            })?;
667            if !is_file_entry(&entry) {
668                continue;
669            }
670            let path = entry.path();
671            if path.file_name().and_then(|n| n.to_str()) != Some(TYPE_INDEX_FILE) {
672                continue;
673            }
674            if self.is_in_log_dir(path) {
675                continue;
676            }
677            if let Some(rel) = self.rel_path(path) {
678                out.push(rel);
679            }
680        }
681        out.sort();
682        Ok(out)
683    }
684
685    /// A `WalkBuilder` configured for db.md SWEEPs: gitignore/global-ignore are
686    /// OFF (a SWEEP must see every file even if the store is a git repo with a
687    /// `.gitignore`), but hidden files/dirs are skipped.
688    fn md_walker(&self, root: &Path) -> WalkBuilder {
689        let mut builder = WalkBuilder::new(root);
690        builder.standard_filters(false).hidden(true);
691        builder
692    }
693
694    /// True if an absolute path lives under the store's root-level `log/`
695    /// rotation-archive directory.
696    fn is_in_log_dir(&self, abs: &Path) -> bool {
697        match self.rel_path(abs) {
698            Some(rel) => rel.components().next().map(|c| c.as_os_str()) == Some("log".as_ref()),
699            None => false,
700        }
701    }
702
703    /// Read a file's frontmatter `updated` field as an RFC3339 timestamp,
704    /// returning `None` when absent/unparseable. A self-contained reader (does
705    /// not depend on the not-yet-implemented `parser::read_file`); parses the
706    /// leading `---`-fenced YAML block with the same engine the parser uses.
707    fn read_updated(&self, abs: &Path) -> Option<DateTime<FixedOffset>> {
708        let text = std::fs::read_to_string(abs).ok()?;
709        let yaml = frontmatter_block(&text)?;
710        let value: serde_yml::Value = serde_yml::from_str(yaml).ok()?;
711        let raw = value.get("updated")?;
712        value_to_datetime(raw)
713    }
714
715    /// The `<YYYY>/<MM>` shard segment for a sharding type, from its primary
716    /// date field with a `created` fallback. Reads the public `Frontmatter`
717    /// fields directly. `None` when no usable date is present.
718    fn primary_shard_segment(&self, type_: &str, fm: &Frontmatter) -> Option<(String, String)> {
719        // Try the type's primary date field first.
720        if let Some(field) = primary_date_field(type_) {
721            if let Some(v) = fm.extra.get(field) {
722                if let Some(seg) = value_to_year_month(v) {
723                    return Some(seg);
724                }
725            }
726        }
727        // Universal fallback: the typed `created` timestamp.
728        fm.created
729            .map(|dt| (format!("{:04}", dt.year()), format!("{:02}", dt.month())))
730    }
731}
732
733// ── Free helpers (no `self`) ────────────────────────────────────────────────
734
735/// True if a walk entry is a regular file (not a dir / symlink-to-dir).
736fn is_file_entry(entry: &ignore::DirEntry) -> bool {
737    entry.file_type().map(|ft| ft.is_file()).unwrap_or(false)
738}
739
740/// True if the path ends in a `.md` extension (case-sensitive — db.md files are
741/// lowercase `.md`).
742fn has_md_extension(path: &Path) -> bool {
743    path.extension().and_then(|e| e.to_str()) == Some("md")
744}
745
746/// True if the basename is a non-content meta file (`DB.md`, `index.md`,
747/// `log.md`) that the content walks must skip.
748fn is_non_content_basename(path: &Path) -> bool {
749    match path.file_name().and_then(|n| n.to_str()) {
750        Some(name) => NON_CONTENT_BASENAMES.contains(&name),
751        None => false,
752    }
753}
754
755/// Append `.md` to a bare name; leave an existing `.md` untouched.
756fn ensure_md_extension(name: &str) -> String {
757    if name.ends_with(".md") {
758        name.to_string()
759    } else {
760        format!("{name}.md")
761    }
762}
763
764/// Render a store-relative path as a wiki-link target string with `/`
765/// separators (never `\`), no leading `./`, no trailing `.md`.
766fn path_to_link_str(target: &Path) -> String {
767    let mut parts: Vec<String> = Vec::new();
768    for comp in target.components() {
769        if let std::path::Component::Normal(os) = comp {
770            if let Some(s) = os.to_str() {
771                parts.push(s.to_string());
772            }
773        }
774    }
775    let mut joined = parts.join("/");
776    if let Some(stripped) = joined.strip_suffix(".md") {
777        joined = stripped.to_string();
778    }
779    joined
780}
781
782/// The canonical default folder for a recognized type, per the SPEC type table
783/// (`email → sources/emails`, `expense → records/expenses`, …). Unrecognized
784/// types fall back to `records/<type>` (the bare type name, no pluralization
785/// guess) — see the store findings on the docstring's looser `<type>` phrasing.
786fn default_type_folder(type_: &str) -> PathBuf {
787    let path = match type_ {
788        // sources
789        "email" => "sources/emails",
790        "transcript" => "sources/transcripts",
791        "pdf-source" => "sources/docs",
792        // records — entities
793        "contact" => "records/contacts",
794        "company" => "records/companies",
795        // records — events
796        "expense" => "records/expenses",
797        "meeting" => "records/meetings",
798        "decision" => "records/decisions",
799        "invoice" => "records/invoices",
800        // wiki — the SPEC type table files a wiki-page under `wiki/<topic>/`,
801        // i.e. ALWAYS a sub-folder, never flat under `wiki/`. A 2-component
802        // `wiki/<file>` path is non-conforming: `index::type_folder_of` /
803        // `validate::type_folder_of` require `<layer>/<type-folder>/<file>` (3
804        // components), so a flat wiki page either crashes write-through
805        // (`on_write` tries to create `index.md` *inside* a file) or is silently
806        // dropped from every catalog by `rebuild_all`. `topic` is the page's
807        // canonical bucket; with only the bare type in hand here, `wiki/topics`
808        // is the deterministic default folder (matches the dogfood store).
809        "wiki-page" => "wiki/topics",
810        // unrecognized: bare type name under records/
811        other => return PathBuf::from("records").join(other),
812    };
813    PathBuf::from(path)
814}
815
816/// The canonical [`Layer`] a `type_` belongs to, derived from its default
817/// type-folder (`email` → `Sources`, `contact` → `Records`, `wiki-page` →
818/// `Wiki`, unrecognized → `Records`). The write path uses this to decide whether
819/// an agent-supplied folder is in the *right* layer for the type before honouring
820/// its sub-folder choice.
821pub fn layer_for_type(type_: &str) -> Layer {
822    layer_of_folder(&default_type_folder(type_)).unwrap_or(Layer::Records)
823}
824
825/// The [`Layer`] a type-folder path lives in, read from its first component
826/// (`sources/` → `Sources`, `records/` → `Records`, `wiki/` → `Wiki`). Used to
827/// bound [`Store::find_by_type`]'s canonical-folder-absent fallback to a single
828/// layer subtree. Returns `None` for a path with no recognized layer prefix;
829/// every value [`default_type_folder`] produces has one, so in practice this is
830/// always `Some` on the call path — `None` degrades to a store-wide read.
831fn layer_of_folder(folder: &Path) -> Option<Layer> {
832    let first = folder.components().next()?.as_os_str().to_str()?;
833    Layer::from_dir_name(first)
834}
835
836/// Infer a content file's canonical `type` from its store-relative path — the
837/// inverse of [`default_type_folder`] and the single source of truth for
838/// path→type inference (the CLI's `fm init` calls this, never re-derives it).
839///
840/// Requires the canonical `<layer>/<type-folder>/<file>` 3-component shape; a
841/// shorter path (a file directly under a layer) or an unknown leading layer
842/// yields `None`.
843///
844/// Recognized `(layer, folder)` pairs map back to their canonical type. For an
845/// unrecognized folder the fallback is the **bare folder name verbatim** (no
846/// pluralization/singularization) so it round-trips with `default_type_folder`,
847/// whose unrecognized fallback is the bare type name (`task` ⇄ `records/task`).
848/// Singularizing here would break that round-trip (`records/tasks` → `task`
849/// while `default_type_folder("task")` → `records/task`). `wiki/<topic>` always
850/// infers `wiki-page`, since every wiki page is filed under a topic folder.
851pub fn infer_type_from_path(rel: &Path) -> Option<String> {
852    let mut comps = rel.components().filter_map(|c| c.as_os_str().to_str());
853    let layer = comps.next()?;
854    if !matches!(layer, "sources" | "records" | "wiki") {
855        return None;
856    }
857    let folder = comps.next()?;
858    // The file itself must be a third component (a real type-folder, not the
859    // file sitting directly under the layer).
860    comps.next()?;
861
862    let mapped = match (layer, folder) {
863        ("sources", "emails") => "email",
864        ("sources", "transcripts") => "transcript",
865        ("sources", "docs") => "pdf-source",
866        ("records", "contacts") => "contact",
867        ("records", "companies") => "company",
868        ("records", "expenses") => "expense",
869        ("records", "meetings") => "meeting",
870        ("records", "decisions") => "decision",
871        ("records", "invoices") => "invoice",
872        // Every wiki page is filed under `wiki/<topic>/`; the type is always
873        // `wiki-page` regardless of the topic-folder name.
874        ("wiki", _) => "wiki-page",
875        // Unrecognized folder: the bare name, verbatim. This is the inverse of
876        // `default_type_folder`'s unrecognized fallback (`other → records/other`)
877        // and the round-trip would break if we pluralized/singularized here.
878        (_, other) => other,
879    };
880    Some(mapped.to_string())
881}
882
883/// The primary date field name for a sharding type (the field whose value
884/// drives `<YYYY>/<MM>`). `None` means "use the `created` fallback only".
885fn primary_date_field(type_: &str) -> Option<&'static str> {
886    match type_ {
887        "email" => Some("date"),
888        "transcript" => Some("recorded_at"),
889        "pdf-source" => Some("received_at"),
890        "expense" | "invoice" | "meeting" => Some("date"),
891        // recognized custom event types have no canonical date field name; they
892        // fall back to `created`.
893        _ => None,
894    }
895}
896
897/// Parse a YAML value into an RFC3339 [`DateTime`], accepting both an explicit
898/// string and a YAML-native scalar rendered to string.
899fn value_to_datetime(value: &serde_yml::Value) -> Option<DateTime<FixedOffset>> {
900    let s = yaml_scalar_string(value)?;
901    DateTime::parse_from_rfc3339(s.trim()).ok()
902}
903
904/// Extract `(YYYY, MM)` from a YAML date/timestamp value. Lenient: matches a
905/// leading `YYYY-MM` so a bare `2026-05-22` date and a full
906/// `2026-05-22T10:00:00-07:00` timestamp both work.
907fn value_to_year_month(value: &serde_yml::Value) -> Option<(String, String)> {
908    let s = yaml_scalar_string(value)?;
909    year_month_from_str(s.trim())
910}
911
912/// `(YYYY, MM)` from the leading `YYYY-MM` of a date string.
913fn year_month_from_str(s: &str) -> Option<(String, String)> {
914    // Hand-roll the leading-`YYYY-MM` parse to avoid a regex compile on the
915    // write path. Require: 4 digits, '-', 2 digits.
916    let bytes = s.as_bytes();
917    if bytes.len() < 7 {
918        return None;
919    }
920    let is_digit = |b: u8| b.is_ascii_digit();
921    if !(is_digit(bytes[0])
922        && is_digit(bytes[1])
923        && is_digit(bytes[2])
924        && is_digit(bytes[3])
925        && bytes[4] == b'-'
926        && is_digit(bytes[5])
927        && is_digit(bytes[6]))
928    {
929        return None;
930    }
931    let month: u8 = (bytes[5] - b'0') * 10 + (bytes[6] - b'0');
932    if !(1..=12).contains(&month) {
933        return None;
934    }
935    Some((s[0..4].to_string(), s[5..7].to_string()))
936}
937
938/// Render a YAML scalar as a string: a real `String` verbatim, otherwise the
939/// value's compact YAML serialization (covers timestamps that the YAML engine
940/// may surface as a non-string scalar).
941fn yaml_scalar_string(value: &serde_yml::Value) -> Option<String> {
942    if let Some(s) = value.as_str() {
943        return Some(s.to_string());
944    }
945    match value {
946        serde_yml::Value::Null => None,
947        serde_yml::Value::Mapping(_) | serde_yml::Value::Sequence(_) => None,
948        other => serde_yml::to_string(other)
949            .ok()
950            .map(|s| s.trim().to_string()),
951    }
952}
953
954/// The YAML frontmatter block of a file: the text between a leading `---` fence
955/// and the next `---` fence, exclusive. `None` if the file does not open with a
956/// `---` fence on its first line.
957fn frontmatter_block(text: &str) -> Option<&str> {
958    // Tolerate a UTF-8 BOM and CRLF, but the fence must be the very first line.
959    let body = text.strip_prefix('\u{feff}').unwrap_or(text);
960    let mut rest = body;
961    // First line must be exactly `---` (allowing trailing CR).
962    let (first, after_first) = split_first_line(rest);
963    if first.trim_end_matches('\r') != "---" {
964        return None;
965    }
966    rest = after_first;
967    let block_start = rest;
968    let mut scanned = 0usize;
969    loop {
970        let (line, after) = split_first_line(rest);
971        if line.trim_end_matches('\r') == "---" {
972            return Some(&block_start[..scanned]);
973        }
974        if after.is_empty() && line.is_empty() {
975            // Reached end of input without a closing fence.
976            return None;
977        }
978        scanned += line.len() + 1; // +1 for the consumed '\n'
979        if after.is_empty() {
980            return None;
981        }
982        rest = after;
983    }
984}
985
986/// Split a string into (first line without its trailing `\n`, remainder after
987/// the `\n`). If there is no newline, the whole string is the line and the
988/// remainder is empty.
989fn split_first_line(s: &str) -> (&str, &str) {
990    match s.find('\n') {
991        Some(i) => (&s[..i], &s[i + 1..]),
992        None => (s, ""),
993    }
994}
995
996/// True if an [`IndexRecord`] has a field `key` equal to `value`, checking the
997/// typed columns first and then the flattened `fields` map.
998fn record_matches_field(record: &IndexRecord, key: &str, value: &str) -> bool {
999    match key {
1000        "type" => record.type_ == value,
1001        "summary" => record.summary == value,
1002        "path" => record.path.to_string_lossy() == value,
1003        "created" => timestamp_matches(record.created, value),
1004        "updated" => timestamp_matches(record.updated, value),
1005        "tags" => record.tags.iter().any(|t| t == value),
1006        "links" => record.links.iter().any(|l| l == value),
1007        other => record
1008            .fields
1009            .get(other)
1010            .map(|v| json_value_matches(v, value))
1011            .unwrap_or(false),
1012    }
1013}
1014
1015/// Compare a record's `created`/`updated` instant against a query `value`.
1016///
1017/// db.md files write timestamps in several equivalent RFC3339 spellings — most
1018/// commonly the `Z` UTC designator (`2026-05-01T00:00:00Z`) but also an explicit
1019/// offset (`...+00:00`, `...-07:00`). A naive `record.created.to_rfc3339() ==
1020/// value` reformats only one side: chrono renders a UTC instant as `+00:00`, so
1021/// the `Z` form an agent reads straight out of the file would never match. We
1022/// instead parse `value` as RFC3339 and compare instants, where `Z` and `+00:00`
1023/// (and any same-instant offset) are equal. A `value` that is not valid RFC3339
1024/// can never equal a real timestamp, so it falls through to `false`.
1025fn timestamp_matches(stored: Option<DateTime<FixedOffset>>, value: &str) -> bool {
1026    match (stored, DateTime::parse_from_rfc3339(value)) {
1027        (Some(stored), Ok(queried)) => stored == queried,
1028        _ => false,
1029    }
1030}
1031
1032/// Compare a JSON field value against a query string. A string matches
1033/// verbatim; scalars match their textual form; an array matches if any element
1034/// matches (so a list-valued frontmatter field is membership-queried).
1035fn json_value_matches(v: &serde_json::Value, value: &str) -> bool {
1036    match v {
1037        serde_json::Value::String(s) => s == value,
1038        serde_json::Value::Bool(b) => b.to_string() == value,
1039        serde_json::Value::Number(n) => n.to_string() == value,
1040        serde_json::Value::Array(items) => items.iter().any(|i| json_value_matches(i, value)),
1041        serde_json::Value::Null => value.is_empty(),
1042        serde_json::Value::Object(_) => false,
1043    }
1044}
1045
1046#[cfg(test)]
1047mod tests {
1048    use super::*;
1049    use std::fs;
1050    use tempfile::{tempdir, TempDir};
1051
1052    // ── Fixtures ────────────────────────────────────────────────────────────
1053
1054    /// Write `contents` to `<root>/<rel>`, creating parent dirs. Returns the
1055    /// store-relative path for convenient assertions.
1056    fn write(root: &Path, rel: &str, contents: &str) -> PathBuf {
1057        let abs = root.join(rel);
1058        fs::create_dir_all(abs.parent().unwrap()).unwrap();
1059        fs::write(&abs, contents).unwrap();
1060        PathBuf::from(rel)
1061    }
1062
1063    /// A minimal content file with the given `updated` timestamp in frontmatter.
1064    fn content_md(updated: &str) -> String {
1065        format!(
1066            "---\ntype: note\ncreated: {updated}\nupdated: {updated}\nsummary: a note\n---\n\nbody\n"
1067        )
1068    }
1069
1070    /// A bare directory with a `DB.md` marker (valid `db-md` frontmatter so the
1071    /// real parser is exercised).
1072    fn empty_store() -> TempDir {
1073        let dir = tempdir().unwrap();
1074        fs::write(
1075            dir.path().join("DB.md"),
1076            "---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n",
1077        )
1078        .unwrap();
1079        dir
1080    }
1081
1082    /// Open a store rooted at a TempDir; panics if `open` rejects it.
1083    fn open(dir: &TempDir) -> Store {
1084        Store::open(dir.path()).expect("fixture should be a valid store")
1085    }
1086
1087    fn rels(paths: &[PathBuf]) -> Vec<String> {
1088        paths
1089            .iter()
1090            .map(|p| p.to_string_lossy().replace('\\', "/"))
1091            .collect()
1092    }
1093
1094    // ── Layer ───────────────────────────────────────────────────────────────
1095
1096    #[test]
1097    fn layer_dir_name_and_parse_are_inverse() {
1098        for layer in Layer::all() {
1099            assert_eq!(Layer::from_dir_name(layer.dir_name()), Some(layer));
1100        }
1101        assert_eq!(Layer::Sources.dir_name(), "sources");
1102        assert_eq!(Layer::Records.dir_name(), "records");
1103        assert_eq!(Layer::Wiki.dir_name(), "wiki");
1104        assert_eq!(Layer::from_dir_name("log"), None);
1105        assert_eq!(Layer::from_dir_name("Sources"), None); // case-sensitive
1106    }
1107
1108    #[test]
1109    fn layer_order_is_canonical() {
1110        // stats keys a BTreeMap on Layer; the sort order must be sources<records<wiki.
1111        let mut v = [Layer::Wiki, Layer::Sources, Layer::Records];
1112        v.sort();
1113        assert_eq!(v, [Layer::Sources, Layer::Records, Layer::Wiki]);
1114    }
1115
1116    // ── is_db_md_store / open ────────────────────────────────────────────────
1117
1118    #[test]
1119    fn is_store_true_only_with_uppercase_marker() {
1120        let dir = tempdir().unwrap();
1121        assert!(
1122            !Store::is_db_md_store(dir.path()),
1123            "no marker → not a store"
1124        );
1125
1126        fs::write(dir.path().join("DB.md"), "---\ntype: db-md\n---\n").unwrap();
1127        assert!(Store::is_db_md_store(dir.path()), "uppercase DB.md → store");
1128    }
1129
1130    #[test]
1131    fn is_store_false_for_lowercase_db_md() {
1132        // The case-sensitivity contract: a lowercase db.md is the spec name, not
1133        // a marker — even on a case-insensitive filesystem where Path::exists
1134        // would lie. This test must pass on macOS (case-insensitive) too.
1135        let dir = tempdir().unwrap();
1136        fs::write(dir.path().join("db.md"), "---\ntype: db-md\n---\n").unwrap();
1137        assert!(
1138            !Store::is_db_md_store(dir.path()),
1139            "lowercase db.md must NOT be treated as a store marker"
1140        );
1141        assert!(Store::open(dir.path()).is_err());
1142    }
1143
1144    #[test]
1145    fn is_store_false_when_db_md_is_a_directory() {
1146        let dir = tempdir().unwrap();
1147        fs::create_dir(dir.path().join("DB.md")).unwrap();
1148        assert!(
1149            !Store::is_db_md_store(dir.path()),
1150            "a directory named DB.md is not the file marker"
1151        );
1152    }
1153
1154    #[test]
1155    fn open_rejects_non_store_with_path() {
1156        let dir = tempdir().unwrap();
1157        let err = Store::open(dir.path()).unwrap_err();
1158        assert_eq!(err.path, dir.path());
1159    }
1160
1161    #[test]
1162    fn open_succeeds_and_parses_config() {
1163        let dir = tempdir().unwrap();
1164        // A DB.md whose ## Policies declares a frozen page — proves open()
1165        // actually parsed the config rather than substituting a default.
1166        fs::write(
1167            dir.path().join("DB.md"),
1168            "---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n\n\
1169             ## Policies\n\n### Frozen pages\n- records/decisions/q1.md\n",
1170        )
1171        .unwrap();
1172        let store = Store::open(dir.path()).unwrap();
1173        assert_eq!(store.root, dir.path());
1174        assert!(
1175            store
1176                .config
1177                .frozen_pages
1178                .iter()
1179                .any(|p| p == Path::new("records/decisions/q1.md")),
1180            "open() must surface DB.md ## Policies, got {:?}",
1181            store.config.frozen_pages
1182        );
1183    }
1184
1185    // ── walk / walk_layer / walk_type_folder ─────────────────────────────────
1186
1187    #[test]
1188    fn walk_collects_content_across_layers_skipping_meta_and_log() {
1189        let dir = empty_store();
1190        let root = dir.path();
1191        write(
1192            root,
1193            "sources/emails/2026/05/a.md",
1194            &content_md("2026-05-01T00:00:00Z"),
1195        );
1196        write(
1197            root,
1198            "records/contacts/sarah.md",
1199            &content_md("2026-05-02T00:00:00Z"),
1200        );
1201        write(
1202            root,
1203            "wiki/people/sarah.md",
1204            &content_md("2026-05-03T00:00:00Z"),
1205        );
1206        // Things walk() must SKIP:
1207        write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); // catalog
1208        write(root, "index.md", "---\ntype: index\n---\n"); // root catalog
1209        write(root, "log.md", "---\ntype: log\n---\n"); // log
1210        write(root, "log/2026-04.md", "---\ntype: log\n---\n"); // rotated log archive
1211        write(
1212            root,
1213            "sources/.hidden/secret.md",
1214            &content_md("2026-05-09T00:00:00Z"),
1215        ); // hidden dir
1216        write(root, "records/contacts/notes.txt", "not markdown"); // non-md
1217
1218        let store = open(&dir);
1219        let got = rels(&store.walk().unwrap());
1220        assert_eq!(
1221            got,
1222            vec![
1223                "records/contacts/sarah.md".to_string(),
1224                "sources/emails/2026/05/a.md".to_string(),
1225                "wiki/people/sarah.md".to_string(),
1226            ]
1227        );
1228    }
1229
1230    #[test]
1231    fn walk_layer_is_scoped() {
1232        let dir = empty_store();
1233        let root = dir.path();
1234        write(
1235            root,
1236            "sources/emails/2026/05/a.md",
1237            &content_md("2026-05-01T00:00:00Z"),
1238        );
1239        write(
1240            root,
1241            "records/contacts/sarah.md",
1242            &content_md("2026-05-02T00:00:00Z"),
1243        );
1244        let store = open(&dir);
1245
1246        assert_eq!(
1247            rels(&store.walk_layer(Layer::Sources).unwrap()),
1248            vec!["sources/emails/2026/05/a.md".to_string()]
1249        );
1250        assert_eq!(
1251            rels(&store.walk_layer(Layer::Records).unwrap()),
1252            vec!["records/contacts/sarah.md".to_string()]
1253        );
1254        // A layer with no directory is empty, not an error.
1255        assert!(store.walk_layer(Layer::Wiki).unwrap().is_empty());
1256    }
1257
1258    #[test]
1259    fn walk_type_folder_recurses_shards_and_accepts_abs_or_rel() {
1260        let dir = empty_store();
1261        let root = dir.path();
1262        write(
1263            root,
1264            "sources/emails/2026/05/a.md",
1265            &content_md("2026-05-01T00:00:00Z"),
1266        );
1267        write(
1268            root,
1269            "sources/emails/2026/06/b.md",
1270            &content_md("2026-06-01T00:00:00Z"),
1271        );
1272        write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); // skipped
1273                                                                           // A different type folder must not leak in.
1274        write(
1275            root,
1276            "sources/docs/2026/05/c.md",
1277            &content_md("2026-05-04T00:00:00Z"),
1278        );
1279        let store = open(&dir);
1280
1281        let expected = vec![
1282            "sources/emails/2026/05/a.md".to_string(),
1283            "sources/emails/2026/06/b.md".to_string(),
1284        ];
1285        // Relative folder arg.
1286        assert_eq!(
1287            rels(&store.walk_type_folder(Path::new("sources/emails")).unwrap()),
1288            expected
1289        );
1290        // Absolute folder arg under the store resolves identically.
1291        assert_eq!(
1292            rels(
1293                &store
1294                    .walk_type_folder(&root.join("sources/emails"))
1295                    .unwrap()
1296            ),
1297            expected
1298        );
1299    }
1300
1301    // ── recent_in_type_folder ────────────────────────────────────────────────
1302
1303    #[test]
1304    fn recent_orders_by_updated_desc_then_path_and_caps() {
1305        let dir = empty_store();
1306        let root = dir.path();
1307        // newest
1308        write(
1309            root,
1310            "records/meetings/2026/05/c.md",
1311            &content_md("2026-05-03T00:00:00Z"),
1312        );
1313        // tie on updated — path asc decides (a before b)
1314        write(
1315            root,
1316            "records/meetings/2026/05/a.md",
1317            &content_md("2026-05-02T00:00:00Z"),
1318        );
1319        write(
1320            root,
1321            "records/meetings/2026/05/b.md",
1322            &content_md("2026-05-02T00:00:00Z"),
1323        );
1324        // oldest
1325        write(
1326            root,
1327            "records/meetings/2026/04/z.md",
1328            &content_md("2026-04-01T00:00:00Z"),
1329        );
1330        let store = open(&dir);
1331
1332        let all = rels(
1333            &store
1334                .recent_in_type_folder(Path::new("records/meetings"), 10)
1335                .unwrap(),
1336        );
1337        assert_eq!(
1338            all,
1339            vec![
1340                "records/meetings/2026/05/c.md".to_string(), // newest
1341                "records/meetings/2026/05/a.md".to_string(), // tie, path asc
1342                "records/meetings/2026/05/b.md".to_string(),
1343                "records/meetings/2026/04/z.md".to_string(), // oldest
1344            ]
1345        );
1346
1347        // Cap takes the n most-recent.
1348        let top2 = rels(
1349            &store
1350                .recent_in_type_folder(Path::new("records/meetings"), 2)
1351                .unwrap(),
1352        );
1353        assert_eq!(
1354            top2,
1355            vec![
1356                "records/meetings/2026/05/c.md".to_string(),
1357                "records/meetings/2026/05/a.md".to_string(),
1358            ]
1359        );
1360    }
1361
1362    #[test]
1363    fn recent_sorts_undated_files_last() {
1364        let dir = empty_store();
1365        let root = dir.path();
1366        write(
1367            root,
1368            "records/contacts/dated.md",
1369            &content_md("2026-05-01T00:00:00Z"),
1370        );
1371        // No `updated` field at all.
1372        write(
1373            root,
1374            "records/contacts/undated.md",
1375            "---\ntype: contact\nsummary: x\n---\nbody\n",
1376        );
1377        let store = open(&dir);
1378        let got = rels(
1379            &store
1380                .recent_in_type_folder(Path::new("records/contacts"), 10)
1381                .unwrap(),
1382        );
1383        assert_eq!(
1384            got,
1385            vec![
1386                "records/contacts/dated.md".to_string(),
1387                "records/contacts/undated.md".to_string(),
1388            ],
1389            "a file with a real `updated` must outrank one with none"
1390        );
1391    }
1392
1393    // ── type_shards ──────────────────────────────────────────────────────────
1394
1395    #[test]
1396    fn type_shards_classification() {
1397        let dir = empty_store();
1398        let store = open(&dir);
1399        for t in [
1400            "email",
1401            "transcript",
1402            "pdf-source",
1403            "expense",
1404            "invoice",
1405            "meeting",
1406            "order",
1407            "ticket",
1408            "transaction",
1409        ] {
1410            assert!(store.type_shards(t), "{t} should shard");
1411        }
1412        for t in [
1413            "contact",
1414            "company",
1415            "decision",
1416            "wiki-page",
1417            "index",
1418            "log",
1419            "db-md",
1420            "proposal",
1421        ] {
1422            assert!(!store.type_shards(t), "{t} should stay flat");
1423        }
1424    }
1425
1426    // ── shard_path_for ───────────────────────────────────────────────────────
1427
1428    fn fm_with_extra(key: &str, value: &str) -> Frontmatter {
1429        let mut fm = Frontmatter::default();
1430        fm.extra
1431            .insert(key.to_string(), serde_yml::Value::String(value.to_string()));
1432        fm
1433    }
1434
1435    fn fm_with_created(rfc3339: &str) -> Frontmatter {
1436        Frontmatter {
1437            created: Some(DateTime::parse_from_rfc3339(rfc3339).unwrap()),
1438            ..Default::default()
1439        }
1440    }
1441
1442    #[test]
1443    fn shard_path_uses_primary_date_field_per_type() {
1444        let dir = empty_store();
1445        let store = open(&dir);
1446
1447        // expense.date → records/expenses/<YYYY>/<MM>/
1448        let p = store
1449            .shard_path_for("expense", &fm_with_extra("date", "2026-05-22"), "lunch")
1450            .unwrap();
1451        assert_eq!(p, PathBuf::from("records/expenses/2026/05/lunch.md"));
1452
1453        // email.date → sources/emails/<YYYY>/<MM>/
1454        let p = store
1455            .shard_path_for(
1456                "email",
1457                &fm_with_extra("date", "2026-11-02T09:00:00-07:00"),
1458                "e1",
1459            )
1460            .unwrap();
1461        assert_eq!(p, PathBuf::from("sources/emails/2026/11/e1.md"));
1462
1463        // transcript.recorded_at → sources/transcripts/<YYYY>/<MM>/
1464        let p = store
1465            .shard_path_for(
1466                "transcript",
1467                &fm_with_extra("recorded_at", "2025-01-15T12:00:00Z"),
1468                "t1",
1469            )
1470            .unwrap();
1471        assert_eq!(p, PathBuf::from("sources/transcripts/2025/01/t1.md"));
1472    }
1473
1474    #[test]
1475    fn shard_path_falls_back_to_created() {
1476        let dir = empty_store();
1477        let store = open(&dir);
1478        // meeting with no `date` field but a `created` timestamp.
1479        let p = store
1480            .shard_path_for(
1481                "meeting",
1482                &fm_with_created("2024-07-09T08:30:00-04:00"),
1483                "sync",
1484            )
1485            .unwrap();
1486        assert_eq!(p, PathBuf::from("records/meetings/2024/07/sync.md"));
1487    }
1488
1489    #[test]
1490    fn shard_path_primary_field_wins_over_created() {
1491        let dir = empty_store();
1492        let store = open(&dir);
1493        let mut fm = fm_with_created("2020-01-01T00:00:00Z");
1494        fm.extra
1495            .insert("date".into(), serde_yml::Value::String("2026-05-22".into()));
1496        let p = store.shard_path_for("expense", &fm, "x").unwrap();
1497        // The primary `date` (2026/05), not `created` (2020/01), drives the shard.
1498        assert_eq!(p, PathBuf::from("records/expenses/2026/05/x.md"));
1499    }
1500
1501    #[test]
1502    fn shard_path_flat_types_have_no_shard_segment() {
1503        let dir = empty_store();
1504        let store = open(&dir);
1505        // A contact has a `created` date, but contacts stay flat.
1506        let p = store
1507            .shard_path_for(
1508                "contact",
1509                &fm_with_created("2026-05-22T00:00:00Z"),
1510                "sarah-chen",
1511            )
1512            .unwrap();
1513        assert_eq!(p, PathBuf::from("records/contacts/sarah-chen.md"));
1514
1515        // wiki-page is flat (no date shard) but still files under a type-folder:
1516        // `wiki/topics/<name>.md`, NEVER flat as `wiki/<name>.md`. A 2-component
1517        // path is invisible to the index/validate type-folder model.
1518        let p = store
1519            .shard_path_for("wiki-page", &Frontmatter::default(), "renewal-theme")
1520            .unwrap();
1521        assert_eq!(p, PathBuf::from("wiki/topics/renewal-theme.md"));
1522    }
1523
1524    /// Regression: a wiki-page written through the toolkit's own path
1525    /// computation must land at a path the index + validate type-folder model
1526    /// accepts. `shard_path_for("wiki-page", …)` previously returned a
1527    /// 2-component `wiki/<file>` path, which `type_folder_of` (in both `index`
1528    /// and `validate`) treats as "no type-folder" — so the page either crashed
1529    /// `Index::on_write` (it tried to create `index.md` inside a file) or was
1530    /// silently dropped from every catalog by `Index::rebuild_all`. The
1531    /// computed path must have 3 components: `<layer>/<type-folder>/<file>`.
1532    #[test]
1533    fn shard_path_wiki_page_is_indexable_three_component_path() {
1534        let dir = empty_store();
1535        let store = open(&dir);
1536        let p = store
1537            .shard_path_for("wiki-page", &Frontmatter::default(), "renewal-theme")
1538            .unwrap();
1539        // First two components are a layer + a non-empty type-folder segment;
1540        // the file is the third. This is exactly the shape `type_folder_of`
1541        // (`comps.len() >= 3`, `comps[0]` a known layer) requires.
1542        let comps: Vec<&str> = p.iter().filter_map(|c| c.to_str()).collect();
1543        assert_eq!(
1544            comps.len(),
1545            3,
1546            "wiki-page path must be <layer>/<type-folder>/<file>, got {p:?}"
1547        );
1548        assert_eq!(comps[0], "wiki", "first component must be the wiki layer");
1549        assert!(
1550            !comps[1].is_empty() && comps[1] != "renewal-theme.md",
1551            "second component must be a real type-folder, not the file: {p:?}"
1552        );
1553        assert!(
1554            comps[2].ends_with(".md"),
1555            "third component must be the .md file: {p:?}"
1556        );
1557    }
1558
1559    #[test]
1560    fn shard_path_preserves_and_adds_md_extension() {
1561        let dir = empty_store();
1562        let store = open(&dir);
1563        let with = store
1564            .shard_path_for("contact", &Frontmatter::default(), "sarah.md")
1565            .unwrap();
1566        let without = store
1567            .shard_path_for("contact", &Frontmatter::default(), "sarah")
1568            .unwrap();
1569        assert_eq!(with, PathBuf::from("records/contacts/sarah.md"));
1570        assert_eq!(without, PathBuf::from("records/contacts/sarah.md"));
1571    }
1572
1573    #[test]
1574    fn shard_path_errors_when_sharding_type_has_no_date() {
1575        let dir = empty_store();
1576        let store = open(&dir);
1577        // expense shards, but no `date` and no `created` → NoShardDate.
1578        let err = store
1579            .shard_path_for("expense", &Frontmatter::default(), "mystery")
1580            .unwrap_err();
1581        match err {
1582            StoreError::NoShardDate { file } => {
1583                assert_eq!(file, PathBuf::from("records/expenses/mystery.md"));
1584            }
1585            other => panic!("expected NoShardDate, got {other:?}"),
1586        }
1587    }
1588
1589    // ── find_links_to ────────────────────────────────────────────────────────
1590
1591    #[test]
1592    fn find_links_to_matches_all_accepted_spellings() {
1593        let dir = empty_store();
1594        let root = dir.path();
1595        let target = "records/contacts/sarah-chen";
1596
1597        // Plain link.
1598        write(
1599            root,
1600            "wiki/people/sarah.md",
1601            &format!("---\ntype: wiki-page\nsummary: s\n---\nSee [[{target}]].\n"),
1602        );
1603        // Link with display text.
1604        write(
1605            root,
1606            "records/meetings/2026/05/m.md",
1607            &format!("---\ntype: meeting\nsummary: s\n---\nWith [[{target}|Sarah]].\n"),
1608        );
1609        // Link with .md extension (accepted, warned by validate).
1610        write(
1611            root,
1612            "wiki/themes/t.md",
1613            &format!("---\ntype: wiki-page\nsummary: s\n---\n[[{target}.md]]\n"),
1614        );
1615        // A catalog/index file also contains the link literally — included.
1616        write(
1617            root,
1618            "records/contacts/index.md",
1619            &format!("---\ntype: index\n---\n- [[{target}]] — Sarah\n"),
1620        );
1621        // No link to the target.
1622        write(
1623            root,
1624            "wiki/people/elena.md",
1625            "---\ntype: wiki-page\nsummary: s\n---\nNo links here.\n",
1626        );
1627        // Short-form link must NOT match the full-path target.
1628        write(
1629            root,
1630            "wiki/people/bob.md",
1631            "---\ntype: wiki-page\nsummary: s\n---\n[[sarah-chen]]\n",
1632        );
1633        // A longer path that merely starts with the target must NOT match
1634        // (boundary correctness): target `sarah-chen` vs `sarah-chen-jr`.
1635        write(
1636            root,
1637            "wiki/people/jr.md",
1638            &format!("---\ntype: wiki-page\nsummary: s\n---\n[[{target}-jr]]\n"),
1639        );
1640
1641        let store = open(&dir);
1642        let got = rels(&store.find_links_to(Path::new(target)).unwrap());
1643        assert_eq!(
1644            got,
1645            vec![
1646                "records/contacts/index.md".to_string(),
1647                "records/meetings/2026/05/m.md".to_string(),
1648                "wiki/people/sarah.md".to_string(),
1649                "wiki/themes/t.md".to_string(),
1650            ]
1651        );
1652    }
1653
1654    #[test]
1655    fn find_links_to_distinguishes_sibling_paths() {
1656        // Two contacts whose paths share a prefix; a link to one must not be
1657        // reported as a link to the other.
1658        let dir = empty_store();
1659        let root = dir.path();
1660        write(
1661            root,
1662            "wiki/a.md",
1663            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah]]\n",
1664        );
1665        write(
1666            root,
1667            "wiki/b.md",
1668            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
1669        );
1670        let store = open(&dir);
1671
1672        assert_eq!(
1673            rels(
1674                &store
1675                    .find_links_to(Path::new("records/contacts/sarah"))
1676                    .unwrap()
1677            ),
1678            vec!["wiki/a.md".to_string()]
1679        );
1680        assert_eq!(
1681            rels(
1682                &store
1683                    .find_links_to(Path::new("records/contacts/sarah-chen"))
1684                    .unwrap()
1685            ),
1686            vec!["wiki/b.md".to_string()]
1687        );
1688    }
1689
1690    // ── find_links_to_any (batch — the O(changed × store) fix) ─────────────────
1691
1692    /// The working-set validate's incoming-linker discovery runs through
1693    /// `find_links_to_any` over the WHOLE changed set in one pass. This pins the
1694    /// batch contract that makes that single-pass behavior correct: the result is
1695    /// the union of incoming linkers across every target, with per-target
1696    /// boundary correctness preserved (no alternation arm bleeds into a
1697    /// prefix-sharing sibling). If a regression reverts the batch finder to a
1698    /// per-object loop, the union below would still hold — but the boundary +
1699    /// union-equivalence assertions are what guard the *correctness* of folding N
1700    /// scans into one regex.
1701    #[test]
1702    fn find_links_to_any_returns_the_union_with_boundary_correctness() {
1703        let dir = empty_store();
1704        let root = dir.path();
1705
1706        // Two distinct targets, each with its own linker.
1707        write(
1708            root,
1709            "wiki/links-sarah.md",
1710            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
1711        );
1712        write(
1713            root,
1714            "wiki/links-acme.md",
1715            "---\ntype: wiki-page\nsummary: s\n---\nDeal with [[records/companies/acme|Acme]].\n",
1716        );
1717        // One file links to BOTH targets — must appear exactly once (deduped),
1718        // proving the per-file early-exit folds multiple-target hits into a
1719        // single result row rather than one row per matched target.
1720        write(
1721            root,
1722            "records/meetings/2026/05/m.md",
1723            "---\ntype: meeting\nsummary: s\n---\n[[records/contacts/sarah-chen]] re \
1724             [[records/companies/acme]]\n",
1725        );
1726        // A prefix-sharing sibling of a target: a link to `sarah-chen-jr` must NOT
1727        // be reported as a link to `sarah-chen` even though the alternation now
1728        // carries `sarah-chen` as one arm.
1729        write(
1730            root,
1731            "wiki/links-jr.md",
1732            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen-jr]]\n",
1733        );
1734        // A file that links to neither requested target.
1735        write(
1736            root,
1737            "wiki/unrelated.md",
1738            "---\ntype: wiki-page\nsummary: s\n---\n[[wiki/themes/spend]]\n",
1739        );
1740
1741        let store = open(&dir);
1742        let targets = vec![
1743            PathBuf::from("records/contacts/sarah-chen"),
1744            PathBuf::from("records/companies/acme"),
1745        ];
1746
1747        let got = rels(&store.find_links_to_any(&targets).unwrap());
1748        assert_eq!(
1749            got,
1750            vec![
1751                "records/meetings/2026/05/m.md".to_string(),
1752                "wiki/links-acme.md".to_string(),
1753                "wiki/links-sarah.md".to_string(),
1754            ],
1755            "batch finder must return the deduped union of linkers across all \
1756             targets, excluding the prefix-sibling and the unrelated file"
1757        );
1758
1759        // Equivalence: the batch result must equal the union of the per-target
1760        // single finder. This is the property the working-set path relies on
1761        // when it folds one-scan-per-object into one scan for the whole set.
1762        let mut union: std::collections::BTreeSet<PathBuf> = std::collections::BTreeSet::new();
1763        for t in &targets {
1764            for linker in store.find_links_to(t).unwrap() {
1765                union.insert(linker);
1766            }
1767        }
1768        assert_eq!(
1769            rels(&union.into_iter().collect::<Vec<_>>()),
1770            got,
1771            "find_links_to_any must equal the union of per-target find_links_to"
1772        );
1773    }
1774
1775    /// An empty target set must scan nothing and find nothing — and crucially
1776    /// must NOT compile to a match-everything empty regex (which would report
1777    /// every `.md` as a linker). This is the empty-working-set fast path the
1778    /// `validate` loop hits when nothing changed.
1779    #[test]
1780    fn find_links_to_any_empty_targets_matches_nothing() {
1781        let dir = empty_store();
1782        let root = dir.path();
1783        write(
1784            root,
1785            "wiki/a.md",
1786            "---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
1787        );
1788        let store = open(&dir);
1789
1790        assert!(
1791            store.find_links_to_any(&[]).unwrap().is_empty(),
1792            "no targets ⇒ no linkers (an empty pattern must not match every file)"
1793        );
1794        // A set of only empty/non-link targets is likewise a no-op, not a
1795        // match-everything.
1796        assert!(
1797            store
1798                .find_links_to_any(&[PathBuf::from(""), PathBuf::from("./")])
1799                .unwrap()
1800                .is_empty(),
1801            "targets that render to empty link text contribute no alternation arm"
1802        );
1803    }
1804
1805    // ── read_type_index ──────────────────────────────────────────────────────
1806
1807    #[test]
1808    fn read_type_index_parses_records_and_flattens_fields() {
1809        let dir = empty_store();
1810        let root = dir.path();
1811        let jsonl = "\
1812{\"path\":\"records/expenses/2026/05/a.md\",\"type\":\"expense\",\"summary\":\"lunch\",\"tags\":[\"meals\"],\"links\":[\"records/companies/acme\"],\"created\":\"2026-05-01T00:00:00Z\",\"updated\":\"2026-05-01T00:00:00Z\",\"vendor\":\"acme\",\"amount\":42}
1813{\"path\":\"records/expenses/2026/05/b.md\",\"type\":\"expense\",\"summary\":\"taxi\",\"created\":null,\"updated\":null,\"vendor\":\"yellow\"}
1814";
1815        let p = write(root, "records/expenses/index.jsonl", jsonl);
1816        let store = open(&dir);
1817        let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
1818
1819        assert_eq!(recs.len(), 2);
1820        // Sorted by path asc.
1821        assert_eq!(recs[0].path, PathBuf::from("records/expenses/2026/05/a.md"));
1822        assert_eq!(recs[0].type_, "expense");
1823        assert_eq!(recs[0].summary, "lunch");
1824        assert_eq!(recs[0].tags, vec!["meals".to_string()]);
1825        assert_eq!(recs[0].links, vec!["records/companies/acme".to_string()]);
1826        assert!(recs[0].created.is_some());
1827        // Extra (non-typed) frontmatter flattens into `fields`.
1828        assert_eq!(
1829            recs[0].fields.get("vendor"),
1830            Some(&serde_json::json!("acme"))
1831        );
1832        assert_eq!(recs[0].fields.get("amount"), Some(&serde_json::json!(42)));
1833        // Defaults: missing tags/links → empty.
1834        assert!(recs[1].tags.is_empty());
1835        assert!(recs[1].links.is_empty());
1836    }
1837
1838    #[test]
1839    fn read_type_index_last_write_wins_and_skips_blanks() {
1840        let dir = empty_store();
1841        let root = dir.path();
1842        // Same path twice; the second line supersedes the first. A blank line
1843        // in between must be ignored, not error.
1844        let jsonl = "\
1845{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"old\",\"created\":null,\"updated\":null}
1846
1847{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"new\",\"created\":null,\"updated\":null}
1848";
1849        let p = write(root, "records/contacts/index.jsonl", jsonl);
1850        let store = open(&dir);
1851        let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
1852        assert_eq!(recs.len(), 1, "duplicate path collapses to one record");
1853        assert_eq!(recs[0].summary, "new", "later line must win");
1854    }
1855
1856    #[test]
1857    fn read_type_index_errors_on_malformed_line() {
1858        let dir = empty_store();
1859        let root = dir.path();
1860        let p = write(root, "records/contacts/index.jsonl", "{not valid json}\n");
1861        let store = open(&dir);
1862        let err = store.read_type_index(&store.abs_path(&p)).unwrap_err();
1863        assert!(matches!(err, StoreError::BadTypeIndex { .. }));
1864    }
1865
1866    // ── find_by_type / find_by_where ─────────────────────────────────────────
1867
1868    fn jsonl_line(path: &str, type_: &str, summary: &str, extra: &str) -> String {
1869        format!(
1870            "{{\"path\":\"{path}\",\"type\":\"{type_}\",\"summary\":\"{summary}\",\"created\":null,\"updated\":null{extra}}}\n"
1871        )
1872    }
1873
1874    #[test]
1875    fn find_by_type_reads_canonical_folder_sidecar() {
1876        let dir = empty_store();
1877        let root = dir.path();
1878        // Canonical folder for `contact` is records/contacts.
1879        write(
1880            root,
1881            "records/contacts/index.jsonl",
1882            &(jsonl_line("records/contacts/sarah.md", "contact", "Sarah", "")
1883                + &jsonl_line("records/contacts/elena.md", "contact", "Elena", "")),
1884        );
1885        // A different type's sidecar must not leak into a contact query.
1886        write(
1887            root,
1888            "records/companies/index.jsonl",
1889            &jsonl_line("records/companies/acme.md", "company", "Acme", ""),
1890        );
1891        let store = open(&dir);
1892        let recs = store.find_by_type("contact").unwrap();
1893        let names: Vec<_> = recs.iter().map(|r| r.summary.clone()).collect();
1894        assert_eq!(names, vec!["Elena".to_string(), "Sarah".to_string()]); // path-sorted
1895        assert!(recs.iter().all(|r| r.type_ == "contact"));
1896    }
1897
1898    #[test]
1899    fn find_by_type_canonical_absent_falls_back_within_the_layer_only() {
1900        let dir = empty_store();
1901        let root = dir.path();
1902        // A custom `proposal` record filed under a non-canonical folder NAME
1903        // (the natural plural `records/proposals/`) inside the records layer.
1904        // `default_type_folder("proposal")` = `records/proposal` (bare type, no
1905        // pluralization guess), so the canonical sidecar does not exist and
1906        // `find_by_type` falls back. The fallback is bounded to the type's
1907        // layer (records), so this record — same layer, non-canonical folder —
1908        // is still found: completeness within the layer holds.
1909        write(
1910            root,
1911            "records/proposals/index.jsonl",
1912            &jsonl_line("records/proposals/p1.md", "proposal", "Q3 proposal", ""),
1913        );
1914        // A DECOY of the SAME type sitting in a DIFFERENT layer (sources/). The
1915        // old whole-store fallback read every sidecar in the store and would
1916        // have leaked this into the result; the layer-bounded fallback must not.
1917        // It also pins that the fallback is O(entities-in-layer), never O(store).
1918        write(
1919            root,
1920            "sources/proposals/index.jsonl",
1921            &jsonl_line(
1922                "sources/proposals/leak.md",
1923                "proposal",
1924                "cross-layer decoy",
1925                "",
1926            ),
1927        );
1928        let store = open(&dir);
1929        let recs = store.find_by_type("proposal").unwrap();
1930        assert_eq!(
1931            recs.len(),
1932            1,
1933            "only the records-layer proposal, not the sources decoy"
1934        );
1935        assert_eq!(recs[0].summary, "Q3 proposal");
1936        assert_eq!(recs[0].path, PathBuf::from("records/proposals/p1.md"));
1937    }
1938
1939    #[test]
1940    fn find_by_type_canonical_absent_does_not_read_other_layers() {
1941        let dir = empty_store();
1942        let root = dir.path();
1943        // `email`'s canonical folder is `sources/emails` (layer Sources). No
1944        // sidecar there yet, so `find_by_type("email")` falls back — but only
1945        // within the Sources layer. A populated sidecar in the Records layer
1946        // must never be touched: the fallback is layer-bounded, not store-wide.
1947        // Under the old `read_all_type_indexes_in(None)` fallback this records
1948        // sidecar would have been read and filtered (wasted O(store) I/O); now
1949        // it is outside the walk root entirely.
1950        write(
1951            root,
1952            "records/contacts/index.jsonl",
1953            &jsonl_line("records/contacts/sarah.md", "contact", "Sarah", ""),
1954        );
1955        let store = open(&dir);
1956        // No email anywhere ⇒ empty, and the records layer was not in scope.
1957        assert!(store.find_by_type("email").unwrap().is_empty());
1958    }
1959
1960    #[test]
1961    fn find_by_where_matches_typed_columns_and_flat_fields() {
1962        let dir = empty_store();
1963        let root = dir.path();
1964        write(
1965            root,
1966            "records/expenses/index.jsonl",
1967            &(jsonl_line(
1968                "records/expenses/a.md",
1969                "expense",
1970                "lunch",
1971                ",\"vendor\":\"acme\",\"tags\":[\"meals\"]",
1972            ) + &jsonl_line(
1973                "records/expenses/b.md",
1974                "expense",
1975                "taxi",
1976                ",\"vendor\":\"yellow\"",
1977            )),
1978        );
1979        write(
1980            root,
1981            "records/contacts/index.jsonl",
1982            &jsonl_line(
1983                "records/contacts/sarah.md",
1984                "contact",
1985                "Sarah",
1986                ",\"tags\":[\"customer\"]",
1987            ),
1988        );
1989        let store = open(&dir);
1990
1991        // Flat field in `fields`.
1992        let by_vendor = store.find_by_where("vendor", "acme").unwrap();
1993        assert_eq!(by_vendor.len(), 1);
1994        assert_eq!(by_vendor[0].path, PathBuf::from("records/expenses/a.md"));
1995
1996        // Typed column: type (spans both expense records).
1997        assert_eq!(store.find_by_where("type", "expense").unwrap().len(), 2);
1998
1999        // Typed list column: tags membership.
2000        let customers = store.find_by_where("tags", "customer").unwrap();
2001        assert_eq!(customers.len(), 1);
2002        assert_eq!(
2003            customers[0].path,
2004            PathBuf::from("records/contacts/sarah.md")
2005        );
2006
2007        // No match → empty.
2008        assert!(store.find_by_where("vendor", "nobody").unwrap().is_empty());
2009    }
2010
2011    #[test]
2012    fn find_by_where_matches_timestamps_across_rfc3339_spellings() {
2013        let dir = empty_store();
2014        let root = dir.path();
2015        // db.md files most commonly carry the `Z` UTC spelling. The index.jsonl
2016        // serialized from such a file preserves it verbatim.
2017        write(
2018            root,
2019            "records/meetings/index.jsonl",
2020            "{\"path\":\"records/meetings/kickoff.md\",\"type\":\"meeting\",\
2021\"summary\":\"kickoff\",\"created\":\"2026-05-01T00:00:00Z\",\
2022\"updated\":\"2026-05-02T09:30:00-07:00\"}\n",
2023        );
2024        let store = open(&dir);
2025
2026        // The exact value an agent reads out of the file (`Z` form) must match.
2027        let by_z = store
2028            .find_by_where("created", "2026-05-01T00:00:00Z")
2029            .unwrap();
2030        assert_eq!(by_z.len(), 1);
2031        assert_eq!(by_z[0].path, PathBuf::from("records/meetings/kickoff.md"));
2032
2033        // The equivalent explicit-offset spelling of the same instant matches too.
2034        assert_eq!(
2035            store
2036                .find_by_where("created", "2026-05-01T00:00:00+00:00")
2037                .unwrap()
2038                .len(),
2039            1
2040        );
2041
2042        // A non-UTC stored value matches both its own offset spelling and the
2043        // same instant expressed as `Z` (instant comparison, not string compare).
2044        assert_eq!(
2045            store
2046                .find_by_where("updated", "2026-05-02T09:30:00-07:00")
2047                .unwrap()
2048                .len(),
2049            1
2050        );
2051        assert_eq!(
2052            store
2053                .find_by_where("updated", "2026-05-02T16:30:00Z")
2054                .unwrap()
2055                .len(),
2056            1
2057        );
2058
2059        // A different instant does not match.
2060        assert!(store
2061            .find_by_where("created", "2026-05-01T00:00:01Z")
2062            .unwrap()
2063            .is_empty());
2064        // A non-RFC3339 query value never matches a real timestamp.
2065        assert!(store
2066            .find_by_where("created", "2026-05-01")
2067            .unwrap()
2068            .is_empty());
2069    }
2070
2071    #[test]
2072    fn find_by_where_in_layer_reads_only_that_layers_sidecars() {
2073        // The O(entities-in-layer) contract: a layer-scoped where read must walk
2074        // ONLY the named layer's subtree. Proven structurally — a *malformed*
2075        // sidecar in another layer would make `read_type_index` error if it were
2076        // read, so a scoped read that succeeds (and excludes that record) is
2077        // proof the other layer's I/O never happened.
2078        let dir = empty_store();
2079        let root = dir.path();
2080        write(
2081            root,
2082            "records/companies/index.jsonl",
2083            &jsonl_line(
2084                "records/companies/acme.md",
2085                "company",
2086                "Acme",
2087                ",\"domain\":\"acme.com\"",
2088            ),
2089        );
2090        // Same field/value in the sources layer — but the sidecar is corrupt.
2091        write(
2092            root,
2093            "sources/emails/index.jsonl",
2094            "{ this is not valid json and would error if read }\n",
2095        );
2096        let store = open(&dir);
2097
2098        // Scoped to records: the corrupt sources sidecar is out of scope, so the
2099        // read succeeds and returns only the records-layer match.
2100        let in_records = store
2101            .find_by_where_in("domain", "acme.com", Some(Layer::Records))
2102            .expect("a records-scoped read must not touch the sources sidecar");
2103        assert_eq!(
2104            rels(
2105                &in_records
2106                    .iter()
2107                    .map(|r| r.path.clone())
2108                    .collect::<Vec<_>>()
2109            ),
2110            vec!["records/companies/acme.md".to_string()]
2111        );
2112
2113        // The store-wide read DOES reach the corrupt sidecar and surfaces it as
2114        // a parse error — confirming the corrupt file is genuinely in the tree
2115        // and that only the layer scope spares it.
2116        let store_wide = store.find_by_where("domain", "acme.com");
2117        assert!(
2118            matches!(store_wide, Err(StoreError::BadTypeIndex { .. })),
2119            "unscoped read walks every layer and hits the corrupt sidecar"
2120        );
2121
2122        // Scoping to the layer that holds only the corrupt sidecar still errors
2123        // (the scope includes it), proving the scope is a real subtree bound and
2124        // not a silent "skip anything that fails".
2125        let in_sources = store.find_by_where_in("domain", "acme.com", Some(Layer::Sources));
2126        assert!(matches!(in_sources, Err(StoreError::BadTypeIndex { .. })));
2127    }
2128
2129    #[test]
2130    fn find_by_where_in_missing_layer_is_empty_not_an_error() {
2131        // A layer-scoped read over a layer folder that does not exist yet must
2132        // return empty (mirrors `walk_layer`'s missing-dir guard), never a walk
2133        // error from `ignore` over a nonexistent path.
2134        let dir = empty_store();
2135        let root = dir.path();
2136        write(
2137            root,
2138            "records/contacts/index.jsonl",
2139            &jsonl_line(
2140                "records/contacts/sarah.md",
2141                "contact",
2142                "Sarah",
2143                ",\"city\":\"denver\"",
2144            ),
2145        );
2146        let store = open(&dir);
2147
2148        // `wiki/` was never created.
2149        let in_wiki = store
2150            .find_by_where_in("city", "denver", Some(Layer::Wiki))
2151            .expect("missing layer subtree is empty, not an error");
2152        assert!(in_wiki.is_empty());
2153
2154        // Same query scoped to the layer that has the record still finds it.
2155        let in_records = store
2156            .find_by_where_in("city", "denver", Some(Layer::Records))
2157            .unwrap();
2158        assert_eq!(in_records.len(), 1);
2159    }
2160
2161    // ── abs_path / rel_path ──────────────────────────────────────────────────
2162
2163    #[test]
2164    fn abs_and_rel_path_roundtrip() {
2165        let dir = empty_store();
2166        let store = open(&dir);
2167        let rel = Path::new("records/contacts/sarah.md");
2168        let abs = store.abs_path(rel);
2169        assert_eq!(abs, dir.path().join(rel));
2170        assert_eq!(store.rel_path(&abs).as_deref(), Some(rel));
2171
2172        // An absolute path is passed through unchanged by abs_path.
2173        assert_eq!(store.abs_path(&abs), abs);
2174
2175        // A path outside the store has no store-relative form.
2176        assert_eq!(store.rel_path(Path::new("/somewhere/else.md")), None);
2177    }
2178
2179    // ── infer_type_from_path (inverse of default_type_folder) ────────────────
2180
2181    #[test]
2182    fn infer_type_maps_every_recognized_folder_back_to_its_type() {
2183        let cases = [
2184            ("sources/emails/x.md", "email"),
2185            ("sources/transcripts/x.md", "transcript"),
2186            ("sources/docs/x.md", "pdf-source"),
2187            ("records/contacts/x.md", "contact"),
2188            ("records/companies/x.md", "company"),
2189            ("records/expenses/x.md", "expense"),
2190            ("records/meetings/x.md", "meeting"),
2191            ("records/decisions/x.md", "decision"),
2192            ("records/invoices/x.md", "invoice"),
2193            // Any wiki sub-folder infers `wiki-page` regardless of the topic name.
2194            ("wiki/topics/x.md", "wiki-page"),
2195            ("wiki/pricing/x.md", "wiki-page"),
2196        ];
2197        for (path, expected) in cases {
2198            assert_eq!(
2199                infer_type_from_path(Path::new(path)).as_deref(),
2200                Some(expected),
2201                "path {path} should infer type {expected}"
2202            );
2203        }
2204    }
2205
2206    #[test]
2207    fn infer_type_round_trips_with_default_type_folder() {
2208        // The canonical invariant: inference is the inverse of the forward map.
2209        // Every recognized type, routed through `default_type_folder` and then
2210        // back through `infer_type_from_path`, must return the original type.
2211        // `wiki-page` is the one many-to-one case (every topic folder maps back
2212        // to `wiki-page`), so its forward folder still round-trips.
2213        let recognized = [
2214            "email",
2215            "transcript",
2216            "pdf-source",
2217            "contact",
2218            "company",
2219            "expense",
2220            "meeting",
2221            "decision",
2222            "invoice",
2223            "wiki-page",
2224        ];
2225        for type_ in recognized {
2226            let folder = default_type_folder(type_);
2227            let file = folder.join("x.md");
2228            assert_eq!(
2229                infer_type_from_path(&file).as_deref(),
2230                Some(type_),
2231                "recognized type {type_} (folder {folder:?}) must round-trip"
2232            );
2233        }
2234    }
2235
2236    #[test]
2237    fn infer_type_round_trips_custom_types_verbatim_no_singularization() {
2238        // Regression guard for the CLI/core divergence: `default_type_folder`'s
2239        // unrecognized fallback is the BARE type name (`task → records/task`,
2240        // `tasks → records/tasks`). Inference must NOT singularize, or a custom
2241        // type would not round-trip (e.g. `records/tasks` → `task` would clash
2242        // with `default_type_folder("task") → records/task`).
2243        for custom in ["task", "tasks", "playbook", "process", "okrs", "ticket"] {
2244            let folder = default_type_folder(custom);
2245            assert_eq!(folder, PathBuf::from("records").join(custom));
2246            let file = folder.join("x.md");
2247            assert_eq!(
2248                infer_type_from_path(&file).as_deref(),
2249                Some(custom),
2250                "custom type {custom} must round-trip verbatim (no singularization)"
2251            );
2252        }
2253
2254        // The specific case named in the finding: a plural custom folder keeps
2255        // its trailing `s`; it is NOT singularized to `task`.
2256        assert_eq!(
2257            infer_type_from_path(Path::new("records/tasks/x.md")).as_deref(),
2258            Some("tasks"),
2259            "records/tasks must infer `tasks`, not `task`"
2260        );
2261    }
2262
2263    #[test]
2264    fn infer_type_requires_three_component_layer_folder_file_shape() {
2265        // Fewer than 3 components: a file directly under a layer has no
2266        // type-folder, so inference yields None (matches the old CLI contract).
2267        assert_eq!(infer_type_from_path(Path::new("records/x.md")), None);
2268        assert_eq!(infer_type_from_path(Path::new("sources/x.md")), None);
2269        assert_eq!(infer_type_from_path(Path::new("wiki/x.md")), None);
2270        assert_eq!(infer_type_from_path(Path::new("x.md")), None);
2271        // Unknown leading layer is never inferred.
2272        assert_eq!(infer_type_from_path(Path::new("foo/bar/x.md")), None);
2273        // Deeper paths still infer from the first type-folder segment (e.g. a
2274        // sharded record under records/expenses/2026/05/x.md).
2275        assert_eq!(
2276            infer_type_from_path(Path::new("records/expenses/2026/05/x.md")).as_deref(),
2277            Some("expense"),
2278        );
2279    }
2280}