dbmd_core/store.rs
1//! `store` — walk, locate, and shard a db.md store.
2//!
3//! A db.md store is one directory marked by an uppercase `DB.md` at its root.
4//! [`Store::open`] is the single gate every store-walking subcommand goes
5//! through; a missing `DB.md` is the [`NotAStore`] error (`NOT_A_STORE`). The
6//! toolkit never guesses a store root.
7//!
8//! Scale discipline lives here: [`Store::walk`] and the layer/type-folder
9//! walks are **SWEEP** primitives used only by `validate --all`,
10//! `index rebuild`, and `stats`. The interactive loop instead uses
11//! [`Store::find_links_to`] / [`Store::find_links_to_any`] (a single
12//! presence-only content scan) and the `index.jsonl` sidecar readers
13//! ([`Store::find_by_type`] / [`Store::find_by_where`] /
14//! [`Store::read_type_index`]) — never a whole-store parse. The batch
15//! [`Store::find_links_to_any`] is what keeps the working-set validate's
16//! incoming-linker discovery a single store scan rather than one scan per
17//! changed object.
18//!
19//! Link edges are defined once, here, by the shared [`extract_edge_targets`] /
20//! [`canonical_link_target`] / [`link_edge_key`] helpers (fence-aware,
21//! whitespace-trimmed, case-folded to the filesystem), so the forward view
22//! (`graph::forwardlinks`), the backward view ([`Store::find_links_to_any`]),
23//! `rename`, and `validate` all agree on exactly which `[[...]]` is an edge.
24//! [`ensure_path_within_store`] is the within-store containment gate every
25//! caller-influenced path passes through before it is read or traversed.
26
27use std::collections::BTreeMap;
28use std::path::{Path, PathBuf};
29use std::time::{SystemTime, UNIX_EPOCH};
30
31use chrono::{DateTime, Datelike, FixedOffset};
32use ignore::WalkBuilder;
33
34use crate::index::IndexRecord;
35use crate::parser::{parse_db_md, Config, Frontmatter};
36
37/// Basenames that are never content files: the config marker and the two
38/// curator-maintained catalogs. The store walks skip these so a SWEEP over the
39/// content layers never mistakes a catalog for a record.
40///
41/// Only `index.md` is excluded by basename, because the content walks traverse
42/// the layer dirs (`sources/`/`records/`) and `index.md` is the only
43/// meta file that appears INSIDE them. The root `DB.md` / `log.md` (and the
44/// `log/` archive) live at the store root, outside every layer, so they are
45/// never reached by these walks — and a content file that merely happens to be
46/// named `DB.md` or `log.md` inside a layer (e.g. `records/docs/DB.md`) is real
47/// content the SPEC does NOT reserve at type-folder depth.
48const NON_CONTENT_BASENAMES: [&str; 1] = ["index.md"];
49
50/// The complete machine-twin sidecar that backs every structured read.
51const TYPE_INDEX_FILE: &str = "index.jsonl";
52
53/// Returned when a path is opened as a store but has no `DB.md` at its root.
54/// Surfaced as the structured code `NOT_A_STORE` with a non-zero exit.
55#[derive(Debug, thiserror::Error)]
56#[error("not a db.md store: {path} has no DB.md")]
57pub struct NotAStore {
58 /// The path that was inspected.
59 pub path: PathBuf,
60}
61
62/// Errors from store-level operations (walk, locate, shard, sidecar read).
63#[derive(Debug, thiserror::Error)]
64pub enum StoreError {
65 /// A sidecar `index.jsonl` could not be read or parsed.
66 #[error("failed to read type index {path}: {message}")]
67 BadTypeIndex {
68 /// The sidecar file.
69 path: PathBuf,
70 /// What went wrong.
71 message: String,
72 },
73
74 /// A required date field for sharding was absent or unparseable, and there
75 /// was no usable fallback.
76 #[error("cannot compute shard path for {file}: no usable date field")]
77 NoShardDate {
78 /// The file being placed.
79 file: PathBuf,
80 },
81
82 /// An embedded-ripgrep scan failed to start or run.
83 #[error("search failed under {root}: {message}")]
84 Search {
85 /// The root the scan ran under.
86 root: PathBuf,
87 /// What went wrong.
88 message: String,
89 },
90
91 /// An underlying I/O failure.
92 #[error(transparent)]
93 Io(#[from] std::io::Error),
94}
95
96/// The three canonical layers of a db.md store.
97///
98/// `Ord`/`PartialOrd` are derived (additively) because sibling modules key
99/// `BTreeMap`s on `Layer` (e.g. `stats::Stats::files_per_layer`); the canonical
100/// declaration order (`Sources` < `Records`) is the sort order.
101#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
102pub enum Layer {
103 /// `sources/` — raw evidence (documentary + testimonial); immutable; date-sharded at scale.
104 Sources,
105 /// `records/` — everything the agent authors; meta-typed fact/operational/conclusion; entity types flat, event types sharded.
106 Records,
107}
108
109impl Layer {
110 /// The on-disk folder name for this layer (`"sources"` / `"records"`).
111 pub fn dir_name(self) -> &'static str {
112 match self {
113 Layer::Sources => "sources",
114 Layer::Records => "records",
115 }
116 }
117
118 /// Parse a layer from its folder name; `None` for anything else.
119 pub fn from_dir_name(name: &str) -> Option<Self> {
120 match name {
121 "sources" => Some(Layer::Sources),
122 "records" => Some(Layer::Records),
123 _ => None,
124 }
125 }
126
127 /// Every layer, in canonical order.
128 pub fn all() -> [Layer; 2] {
129 [Layer::Sources, Layer::Records]
130 }
131}
132
133/// An opened db.md store: its root path plus the parsed `DB.md` [`Config`].
134///
135/// Construct via [`Store::open`]; that is the only path in, and it validates
136/// the `DB.md` marker so downstream code can assume a real store.
137#[derive(Debug, Clone)]
138pub struct Store {
139 /// The store root (the directory containing `DB.md`).
140 pub root: PathBuf,
141 /// The parsed `DB.md` config (agent instructions, policies, schemas).
142 pub config: Config,
143}
144
145impl Store {
146 /// True if `path` is a db.md store root: an uppercase `DB.md` file exists
147 /// at `path`. On case-sensitive filesystems a lowercase `db.md` must NOT
148 /// count (the lowercase name refers to the project/spec, not the marker).
149 pub fn is_db_md_store(path: &Path) -> bool {
150 // Read the directory and match the *stored* filename byte-for-byte.
151 // `path.join("DB.md").exists()` would lie on a case-insensitive
152 // filesystem (macOS default), where a lowercase `db.md` answers a
153 // `DB.md` probe. `read_dir` returns the real on-disk name, so the
154 // exact-match check is correct on both case-sensitive (Linux) and
155 // case-insensitive filesystems.
156 let entries = match std::fs::read_dir(path) {
157 Ok(entries) => entries,
158 Err(_) => return false,
159 };
160 for entry in entries.flatten() {
161 if entry.file_name() == "DB.md" {
162 // A directory literally named `DB.md` is not the marker.
163 match entry.file_type() {
164 Ok(ft) if ft.is_dir() => return false,
165 Ok(_) => return true,
166 Err(_) => return false,
167 }
168 }
169 }
170 false
171 }
172
173 /// Open `path` as a db.md store and require `DB.md` to be readable and
174 /// parseable. Normal commands should enter through this strict gate so a
175 /// damaged config cannot silently disable schema or policy rules.
176 pub fn open_strict(path: &Path) -> crate::Result<Store> {
177 if !Store::is_db_md_store(path) {
178 return Err(NotAStore {
179 path: path.to_path_buf(),
180 }
181 .into());
182 }
183 let db_md = path.join("DB.md");
184 let text = std::fs::read_to_string(&db_md)?;
185 let config = parse_db_md(&text, &db_md)?;
186 Ok(Store {
187 root: path.to_path_buf(),
188 config,
189 })
190 }
191
192 /// Open `path` as a db.md store: confirm the `DB.md` marker (else
193 /// [`NotAStore`]) and parse the `DB.md` config when possible. This is the
194 /// lenient validation-oriented open path: a damaged `DB.md` still marks the
195 /// directory as a store so `dbmd validate` can report the config error as an
196 /// issue. Normal CLI commands should use [`Store::open_strict`] instead.
197 pub fn open(path: &Path) -> Result<Store, NotAStore> {
198 if !Store::is_db_md_store(path) {
199 return Err(NotAStore {
200 path: path.to_path_buf(),
201 });
202 }
203 let db_md = path.join("DB.md");
204 // The marker exists; parse its config. A read or parse failure leaves
205 // the store openable with default config rather than masquerading as
206 // NOT_A_STORE — the marker is present, so this *is* a store; a damaged
207 // DB.md is `dbmd validate`'s job to report, not `open`'s.
208 let config = match std::fs::read_to_string(&db_md) {
209 Ok(text) => parse_db_md(&text, &db_md).unwrap_or_default(),
210 Err(_) => Config::default(),
211 };
212 Ok(Store {
213 root: path.to_path_buf(),
214 config,
215 })
216 }
217
218 /// **SWEEP.** Recursively iterate every `.md` content file across
219 /// `sources/` and `records/`, skipping hidden dirs and `log/`.
220 /// Used only by `validate --all`, `index rebuild`, and `stats` — never on
221 /// the interactive loop.
222 pub fn walk(&self) -> Result<Vec<PathBuf>, StoreError> {
223 // Only the three content layers — never root meta files (`DB.md`,
224 // `index.md`, `log.md`) and never `log/`, which live at root and are
225 // outside every layer dir.
226 let mut out = Vec::new();
227 for layer in Layer::all() {
228 out.extend(self.walk_layer(layer)?);
229 }
230 out.sort();
231 Ok(out)
232 }
233
234 /// **SWEEP.** Like [`Store::walk`] but scoped to a single layer.
235 pub fn walk_layer(&self, layer: Layer) -> Result<Vec<PathBuf>, StoreError> {
236 let layer_root = self.root.join(layer.dir_name());
237 if !layer_root.is_dir() {
238 return Ok(Vec::new());
239 }
240 self.walk_content_md(&layer_root)
241 }
242
243 /// Enumerate every `.md` file in a single type-folder, **recursing through
244 /// its date-shards** (`sources/emails/**/*.md`). The unit the index builder
245 /// and per-folder rebuild operate on. SWEEP-class (scoped to one folder).
246 pub fn walk_type_folder(&self, type_folder: &Path) -> Result<Vec<PathBuf>, StoreError> {
247 let abs = self.resolve_under_root(type_folder);
248 if !abs.is_dir() {
249 return Ok(Vec::new());
250 }
251 self.walk_content_md(&abs)
252 }
253
254 /// The ≤`n` most-recent files in a type-folder by frontmatter `updated`
255 /// (descending), ties broken by store-relative path (ascending) — a total
256 /// order, so write-through and rebuild never disagree on #500 vs #501.
257 ///
258 /// Reads `updated` across the folder's shards — a SWEEP cost absorbed into
259 /// `index rebuild`. The write-through path never calls this. The
260 /// cap-selection primitive for the 500-entry `index.md` browse view.
261 pub fn recent_in_type_folder(
262 &self,
263 type_folder: &Path,
264 n: usize,
265 ) -> Result<Vec<PathBuf>, StoreError> {
266 let files = self.walk_type_folder(type_folder)?;
267 // (updated, rel-path) for each file. Files missing/unparseable
268 // `updated` sort *after* dated ones (None last), then by path — so they
269 // are deterministically the lowest-priority candidates for the cap, not
270 // dropped silently. The total order (updated desc, path asc) is what
271 // keeps write-through and rebuild agreeing on #500 vs #501.
272 let mut keyed: Vec<(Option<DateTime<FixedOffset>>, PathBuf)> = files
273 .into_iter()
274 .map(|rel| {
275 let updated = self.read_updated(&self.abs_path(&rel));
276 (updated, rel)
277 })
278 .collect();
279 keyed.sort_by(|a, b| {
280 // `updated` descending: newest first. `None` is treated as the
281 // oldest possible, so dated files always win a cap slot over
282 // undated ones.
283 let by_updated = b.0.cmp(&a.0);
284 by_updated.then_with(|| a.1.cmp(&b.1))
285 });
286 keyed.truncate(n);
287 Ok(keyed.into_iter().map(|(_, rel)| rel).collect())
288 }
289
290 /// The shard/flat predicate: true if the type date-shards, false if it
291 /// stays flat. True for source types and event record types
292 /// (`expense`/`invoice`/`meeting` + custom `order`/`ticket`/`transaction`),
293 /// or when `DB.md ## Schemas` declares `shard: by-date`. False for
294 /// dedup-bounded entity types (`contact`/`company`/`decision`) and
295 /// conclusion records (`profile`/`concept`/`synthesis`).
296 pub fn type_shards(&self, type_: &str) -> bool {
297 // A `DB.md ## Schemas` `### <type>` block with a `shard:` directive is
298 // authoritative — it is the v0.2 generic-model way to declare sharding,
299 // so it overrides the built-in default below (in either direction).
300 if let Some(shard) = self.config.schemas.get(type_).and_then(|s| s.shard) {
301 return shard;
302 }
303 // Built-in default for the example types. Sharding is a property of the
304 // *type*:
305 // - source types carry a primary date field and shard;
306 // - event record types track business volume and shard;
307 // - dedup-bounded entity types and curation-bounded conclusion
308 // records (`profile`/`concept`/`synthesis`) stay flat.
309 // Any type can override this via a `shard:` directive (above).
310 matches!(
311 type_,
312 // source types (documentary + testimonial)
313 "email" | "transcript" | "pdf-source" | "note"
314 // event record types (canonical)
315 | "expense" | "invoice" | "meeting"
316 // event record types (recognized custom, per the plan)
317 | "order" | "ticket" | "transaction"
318 )
319 }
320
321 /// Compute the canonical write path for a new file. For a sharding type
322 /// (per [`Store::type_shards`]) insert `<YYYY>/<MM>/` from the type's
323 /// primary date field (`email.date`, `expense.date`, … fallback `created`)
324 /// under the type folder; flat types (entity + conclusion records) get no
325 /// shard segment.
326 /// Deterministic + stable: same input → same path, so a record never moves
327 /// once written.
328 pub fn shard_path_for(
329 &self,
330 type_: &str,
331 frontmatter: &Frontmatter,
332 name: &str,
333 ) -> Result<PathBuf, StoreError> {
334 self.shard_path_in(&default_type_folder(type_), type_, frontmatter, name)
335 }
336
337 /// Like [`Store::shard_path_for`], but compute the path under an explicit,
338 /// caller-resolved type-folder rather than the canonical default. This lets a
339 /// write surface honour an agent-supplied conforming sub-folder — e.g. a
340 /// conclusion record filed under `records/profiles/`, `records/concepts/`, or
341 /// `records/synthesis/` (a conclusion record may be filed under ANY
342 /// `records/<folder>/`, not only its canonical one) — while still applying
343 /// date-sharding for sharding types. The folder must be a conforming
344 /// `<layer>/<type-folder>` (2
345 /// components, recognized layer); the caller is responsible for that (see the
346 /// CLI's `resolve_write_path`), so it is taken as given here.
347 ///
348 /// Sharding is still a property of the *type*: a sharding type gets the
349 /// `<YYYY>/<MM>` segment under `folder`; a flat type lands directly in it.
350 pub fn shard_path_in(
351 &self,
352 folder: &Path,
353 type_: &str,
354 frontmatter: &Frontmatter,
355 name: &str,
356 ) -> Result<PathBuf, StoreError> {
357 let folder = folder.to_path_buf();
358 let filename = ensure_md_extension(name);
359
360 if !self.type_shards(type_) {
361 // Flat type (entity records, conclusion records, decisions): no
362 // shard segment.
363 return Ok(folder.join(filename));
364 }
365
366 // Sharding type: derive <YYYY>/<MM> from the primary date field, with
367 // `created` as the universal fallback. Reading the public `Frontmatter`
368 // fields directly (typed `created`/`updated` + raw `extra`) avoids the
369 // not-yet-implemented `Frontmatter::get`/`parse` and keeps this pure.
370 let (year, month) = self
371 .primary_shard_segment(type_, frontmatter)
372 .ok_or_else(|| StoreError::NoShardDate {
373 file: folder.join(&filename),
374 })?;
375
376 Ok(folder.join(year).join(month).join(filename))
377 }
378
379 /// Find files with an incoming wiki-link to `target` via a **single
380 /// presence-only content scan** for an edge to `target` across all layers,
381 /// using the shared fence-aware/whitespace-trimmed/case-folded edge notion
382 /// ([`extract_edge_targets`]). Loop-fast; no whole-graph build. Returns
383 /// store-relative paths.
384 pub fn find_links_to(&self, target: &Path) -> Result<Vec<PathBuf>, StoreError> {
385 // A single target is just the degenerate batch case — one key, one store
386 // scan. Routing through `find_links_to_any` keeps the
387 // pattern construction and the scan loop in exactly one place. The
388 // batch API takes `&[PathBuf]`, so the one-element slice is owned (a
389 // single alloc on this single-target convenience path; the batch path
390 // validate.rs rides is untouched).
391 self.find_links_to_any(&[target.to_path_buf()])
392 }
393
394 /// Find every file with an incoming wiki-link to **any** of `targets`, in a
395 /// **single content pass** over the store (one `.md` walk, one presence-only
396 /// edge scan per file). This is the batch incoming-linker finder the
397 /// working-set [`crate::validate::validate_working_set`] sits on: it must find
398 /// the linkers for the *whole* changed set without paying a full store read
399 /// per changed object. Cost is therefore one store scan (O(store)), NOT
400 /// `targets.len() × store` — calling [`find_links_to`](Self::find_links_to)
401 /// in a loop would reread every `.md` once per target and is the exact
402 /// `O(changed × store)` blow-up this method exists to prevent. Returns
403 /// store-relative paths (deduped, sorted).
404 ///
405 /// **One edge notion with `forwardlinks`/`rename`/`validate`.** A file links
406 /// to a target iff [`extract_edge_targets`] (fence-aware, whitespace-trimmed)
407 /// of its content yields a target whose [`link_edge_key`] equals the target's
408 /// — the *same* definition the forward view and the rename rewriter use. The
409 /// previous implementation used a literal-adjacency ripgrep regex that (a)
410 /// matched `[[...]]` text inside fenced code examples (which validate treats
411 /// as non-edges), (b) missed inner-whitespace padding (`[[ x ]]`), and (c)
412 /// compared case-sensitively even where the filesystem resolves links
413 /// case-insensitively — so backlinks/links/rename silently disagreed with
414 /// forwardlinks and validate. Reading content and routing through the shared
415 /// extractor removes all three divergences.
416 ///
417 /// Why content scan and not the sidecar `links` field: the sidecar projects
418 /// only the frontmatter `links:` array, so it misses edges written in the
419 /// body or in typed fields (`company: [[…]]`). Finding an incoming link to an
420 /// arbitrary path therefore requires reading file content.
421 pub fn find_links_to_any(&self, targets: &[PathBuf]) -> Result<Vec<PathBuf>, StoreError> {
422 // Build the set of comparison keys for the requested targets, in the
423 // canonical (case-folded where the filesystem is case-insensitive) form
424 // the edge extractor emits. An empty key (a target that renders to no
425 // link text, e.g. `""` or `"./"`) contributes nothing — and crucially the
426 // empty set short-circuits below so we never report every file.
427 let want: std::collections::HashSet<String> = targets
428 .iter()
429 .filter_map(|t| {
430 let canonical = canonical_link_target(&t.to_string_lossy());
431 if canonical.is_empty() {
432 None
433 } else {
434 Some(link_edge_key(&canonical))
435 }
436 })
437 .collect();
438 if want.is_empty() {
439 return Ok(Vec::new());
440 }
441
442 let mut hits = std::collections::BTreeSet::new();
443 // Scan every `.md` file in the store (skip hidden + `log/`), including
444 // `index.md` catalogs — an incoming reference is wherever the link text
445 // lives; the caller decides relevance. ONE walk for the whole target set;
446 // per file we stop at the first matching edge (presence is all we need),
447 // so a file that links to several targets is read once, not once per
448 // target.
449 for rel in self.walk_all_md()? {
450 let abs = self.abs_path(&rel);
451 // Read lossily: a `.md` verbatim-ingested into `sources/` can carry a
452 // stray non-UTF-8 byte (a mis-decoded Latin-1 import). Decoding
453 // lossily substitutes replacement characters instead of erroring, so
454 // one bad byte on a link-bearing line no longer aborts the whole
455 // store scan (the historical `UTF8`-sink failure). The link syntax is
456 // ASCII, so a replacement char elsewhere on the line never hides a
457 // `[[...]]`. A read error (not a decode error) is genuine I/O trouble
458 // and propagates.
459 let bytes = match std::fs::read(&abs) {
460 Ok(b) => b,
461 Err(e) => {
462 return Err(StoreError::Search {
463 root: self.root.clone(),
464 message: format!("read failed in {}: {e}", abs.display()),
465 })
466 }
467 };
468 let text = String::from_utf8_lossy(&bytes);
469 for target in extract_edge_targets(&text) {
470 if want.contains(&link_edge_key(&target)) {
471 hits.insert(rel);
472 break;
473 }
474 }
475 }
476 Ok(hits.into_iter().collect())
477 }
478
479 /// Candidate set for a `type` query: read every type-folder `index.jsonl`
480 /// sidecar in the type's single layer and return the records of that
481 /// `type`. Complete and cold-cache-proof — NOT a walk-and-parse or a
482 /// frontmatter ripgrep scan, and **never a store-wide read**.
483 ///
484 /// The read is bounded to the type's one layer subtree
485 /// (O(entities-in-layer)): a type lives in exactly one layer, and
486 /// `default_type_folder` always encodes it (recognized → its SPEC layer;
487 /// unrecognized → `records/`), so the walk never fans out across every
488 /// sidecar in the store and stays inside the interactive loop's
489 /// O(entities) contract.
490 ///
491 /// The whole-layer read — rather than reading only the type's canonical
492 /// folder sidecar when it happens to exist — is what makes the result
493 /// *complete*. A single `type` can legitimately be filed across several
494 /// folders within its layer: a conclusion `profile` filed under any
495 /// `records/<folder>/`, or a `contact` filed in `records/clients/` alongside
496 /// the canonical `records/contacts/`. The previous code read only the
497 /// canonical-guess sidecar whenever it was a file, which silently dropped
498 /// those non-canonical records the moment the canonical sidecar existed —
499 /// returning an incomplete set, and a *different* set as the store grew
500 /// (the omission flipped on once one canonical record was added). That
501 /// broke the dedup/enumeration premise this primitive backs and disagreed
502 /// with `find_by_where_in`, which already walks the whole layer. Filtering
503 /// the layer read by `type` keeps the result complete regardless of how the
504 /// type's records are foldered.
505 pub fn find_by_type(&self, type_: &str) -> Result<Vec<IndexRecord>, StoreError> {
506 let canonical_folder = default_type_folder(type_);
507 let records = self.read_all_type_indexes_in(layer_of_folder(&canonical_folder))?;
508 Ok(records.into_iter().filter(|r| r.type_ == type_).collect())
509 }
510
511 /// Candidate set for a `key=value` frontmatter query, **store-wide**: read
512 /// every type-folder `index.jsonl` sidecar and filter their records. The
513 /// unscoped pre-write dedup primitive; prefer [`Store::find_by_where_in`]
514 /// with a layer scope to stay O(entities-in-layer) on the interactive loop.
515 pub fn find_by_where(&self, key: &str, value: &str) -> Result<Vec<IndexRecord>, StoreError> {
516 self.find_by_where_in(key, value, None)
517 }
518
519 /// Candidate set for a `key=value` frontmatter query, **scoped to one
520 /// layer** when `layer` is `Some`: the sidecar walk is confined to that
521 /// layer's subtree (`<root>/<layer>/`), so the I/O is O(entities-in-layer),
522 /// not O(store records). `None` keeps the store-wide read.
523 ///
524 /// This is what makes `--in <layer>` an I/O scope, not just a result
525 /// filter: a `--where`-only query (no `--type`) used to read every sidecar
526 /// in the store and narrow by layer in memory, breaking the O(entities)
527 /// contract the interactive loop depends on. With a layer in hand we walk
528 /// only that layer's sidecars.
529 pub fn find_by_where_in(
530 &self,
531 key: &str,
532 value: &str,
533 layer: Option<Layer>,
534 ) -> Result<Vec<IndexRecord>, StoreError> {
535 // A `key=value` query can target any frontmatter field across any type,
536 // so within the chosen subtree we still read every type-folder sidecar
537 // and filter. The layer (when given) bounds *which* subtree, turning a
538 // whole-store walk into a single-layer walk.
539 let records = self.read_all_type_indexes_in(layer)?;
540 Ok(records
541 .into_iter()
542 .filter(|r| record_matches_field(r, key, value))
543 .collect())
544 }
545
546 /// Every record across the type-folder `index.jsonl` sidecars, scoped to one
547 /// layer when `layer` is `Some` (the walk is confined to `<root>/<layer>/`)
548 /// else store-wide. Sequential, complete sidecar reads — never a
549 /// walk-and-parse of the content tree.
550 ///
551 /// This is the unfiltered sidecar-enumeration primitive the relationship
552 /// loop sits on: [`crate::graph::backlinks_filtered`] uses it to bound its
553 /// candidate set to the relevant layer (or the whole store) without opening
554 /// the content tree, then confirms each candidate's edge by parsing the file.
555 pub fn sidecar_records(&self, layer: Option<Layer>) -> Result<Vec<IndexRecord>, StoreError> {
556 self.read_all_type_indexes_in(layer)
557 }
558
559 /// Parse a type-folder's `index.jsonl` into [`IndexRecord`]s, applying
560 /// last-write-wins by `path` over any un-compacted lines. The sidecar-read
561 /// primitive every structured query sits on.
562 pub fn read_type_index(&self, index_jsonl: &Path) -> Result<Vec<IndexRecord>, StoreError> {
563 let text = std::fs::read_to_string(index_jsonl).map_err(|e| StoreError::BadTypeIndex {
564 path: index_jsonl.to_path_buf(),
565 message: e.to_string(),
566 })?;
567
568 // Last-write-wins by `path` over un-compacted lines: a later line for
569 // the same path supersedes an earlier one (the jsonl is append-mostly
570 // and only compacted on rebuild). Blank lines are skipped; a non-blank
571 // line that is not a valid IndexRecord is a hard parse error.
572 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
573 for (i, line) in text.lines().enumerate() {
574 let trimmed = line.trim();
575 if trimmed.is_empty() {
576 continue;
577 }
578 let record: IndexRecord =
579 serde_json::from_str(trimmed).map_err(|e| StoreError::BadTypeIndex {
580 path: index_jsonl.to_path_buf(),
581 message: format!("line {}: {e}", i + 1),
582 })?;
583 by_path.insert(record.path.clone(), record);
584 }
585 // BTreeMap keyed by path → records emerge sorted by path ascending,
586 // a deterministic order independent of line order in the file.
587 Ok(by_path.into_values().collect())
588 }
589
590 /// Resolve a store-relative path to its absolute on-disk path under
591 /// [`root`](Store::root).
592 pub fn abs_path(&self, store_relative: &Path) -> PathBuf {
593 // `Path::join` returns `store_relative` unchanged if it is already
594 // absolute, so passing an absolute path through is a no-op.
595 self.root.join(store_relative)
596 }
597
598 /// Convert an absolute path under the store into its store-relative form.
599 pub fn rel_path(&self, abs: &Path) -> Option<PathBuf> {
600 abs.strip_prefix(&self.root).ok().map(|p| p.to_path_buf())
601 }
602
603 // ── Private helpers ─────────────────────────────────────────────────────
604
605 /// Resolve a caller-supplied folder path (store-relative or absolute) to an
606 /// absolute path under the store root.
607 fn resolve_under_root(&self, folder: &Path) -> PathBuf {
608 if folder.is_absolute() {
609 folder.to_path_buf()
610 } else {
611 self.root.join(folder)
612 }
613 }
614
615 /// Walk a subtree for content `.md` files (skip hidden dirs, skip `index.md`
616 /// / `DB.md` / `log.md`), returning store-relative paths. Used by the layer
617 /// and type-folder walks.
618 fn walk_content_md(&self, root: &Path) -> Result<Vec<PathBuf>, StoreError> {
619 let mut out = Vec::new();
620 for entry in self.md_walker(root).build() {
621 let entry = entry.map_err(|e| StoreError::Search {
622 root: root.to_path_buf(),
623 message: e.to_string(),
624 })?;
625 if !is_file_entry(&entry) {
626 continue;
627 }
628 let path = entry.path();
629 if !has_md_extension(path) {
630 continue;
631 }
632 if is_non_content_basename(path) {
633 continue;
634 }
635 if let Some(rel) = self.rel_path(path) {
636 out.push(rel);
637 }
638 }
639 out.sort();
640 Ok(out)
641 }
642
643 /// Walk the whole store for **every** `.md` file (including `index.md`),
644 /// skipping hidden dirs and the `log/` archive tree. Used by the backlink
645 /// scan, where the literal link text can live in any markdown file.
646 fn walk_all_md(&self) -> Result<Vec<PathBuf>, StoreError> {
647 let mut out = Vec::new();
648 for entry in self.md_walker(&self.root).build() {
649 let entry = entry.map_err(|e| StoreError::Search {
650 root: self.root.clone(),
651 message: e.to_string(),
652 })?;
653 if !is_file_entry(&entry) {
654 continue;
655 }
656 let path = entry.path();
657 if !has_md_extension(path) {
658 continue;
659 }
660 if self.is_in_log_dir(path) {
661 continue;
662 }
663 if let Some(rel) = self.rel_path(path) {
664 out.push(rel);
665 }
666 }
667 out.sort();
668 Ok(out)
669 }
670
671 /// Read and merge every type-folder `index.jsonl` sidecar under `layer`
672 /// when given, else the whole store (skip hidden + `log/`). Each sidecar is
673 /// read with last-write-wins by path; across sidecars, paths are disjoint by
674 /// construction (one sidecar per folder), so a plain concatenation preserves
675 /// completeness. A layer scope confines the walk to `<root>/<layer>/`, which
676 /// is what keeps `find_by_where_in` O(entities-in-layer).
677 fn read_all_type_indexes_in(
678 &self,
679 layer: Option<Layer>,
680 ) -> Result<Vec<IndexRecord>, StoreError> {
681 let mut out = Vec::new();
682 for sidecar in self.find_type_index_files_in(layer)? {
683 out.extend(self.read_type_index(&self.abs_path(&sidecar))?);
684 }
685 Ok(out)
686 }
687
688 /// Locate every `index.jsonl` sidecar under `layer` (when given) else the
689 /// whole store (skip hidden + `log/`), returning store-relative paths. A
690 /// scoped read walks `<root>/<layer>/`; the store-wide read enumerates the
691 /// two canonical layer subtrees (`sources/`, `records/`) — the
692 /// same store model [`Store::walk`] uses — rather than walking from
693 /// `self.root`. Walking from root would descend into non-layer top-level
694 /// dirs (`EXPECTED/` test goldens, an `archive/` of frozen index copies,
695 /// any sibling folder holding store-relative `path`s), pulling their
696 /// sidecars in and returning every record twice. A non-existent layer
697 /// subtree yields no sidecars rather than walking a missing path.
698 fn find_type_index_files_in(&self, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
699 // Store-wide read: union the per-layer scoped reads so only the three
700 // content layers are walked (never root meta files or non-layer dirs),
701 // matching `Store::walk`. The per-layer paths are disjoint by folder, so
702 // a plain concatenation preserves completeness.
703 let Some(layer) = layer else {
704 let mut out = Vec::new();
705 for l in Layer::all() {
706 out.extend(self.find_type_index_files_in(Some(l))?);
707 }
708 out.sort();
709 return Ok(out);
710 };
711 let walk_root = self.root.join(layer.dir_name());
712 // A scoped walk over a layer folder that does not exist yet must be an
713 // empty result, mirroring `walk_layer`'s missing-dir guard — not a walk
714 // error from `ignore` over a nonexistent path.
715 if !walk_root.is_dir() {
716 return Ok(Vec::new());
717 }
718 let mut out = Vec::new();
719 let mut builder = WalkBuilder::new(&walk_root);
720 builder
721 .standard_filters(false)
722 .hidden(true)
723 .follow_links(true);
724 for entry in builder.build() {
725 let entry = entry.map_err(|e| StoreError::Search {
726 root: walk_root.clone(),
727 message: e.to_string(),
728 })?;
729 if !is_file_entry(&entry) {
730 continue;
731 }
732 let path = entry.path();
733 if path.file_name().and_then(|n| n.to_str()) != Some(TYPE_INDEX_FILE) {
734 continue;
735 }
736 if self.is_in_log_dir(path) {
737 continue;
738 }
739 if let Some(rel) = self.rel_path(path) {
740 out.push(rel);
741 }
742 }
743 out.sort();
744 Ok(out)
745 }
746
747 /// A `WalkBuilder` configured for db.md SWEEPs: gitignore/global-ignore are
748 /// OFF (a SWEEP must see every file even if the store is a git repo with a
749 /// `.gitignore`), but hidden files/dirs are skipped. Symlinks are
750 /// **followed** (`follow_links(true)`) so a symlinked `.md` content file or
751 /// a symlinked type folder (e.g. `records/companies -> /other/disk/...`) is
752 /// walked like any other content rather than silently vanishing; a symlinked
753 /// layer dir was already traversed (the walk root is followed), so following
754 /// symlinks one level deeper just removes that inconsistency.
755 fn md_walker(&self, root: &Path) -> WalkBuilder {
756 let mut builder = WalkBuilder::new(root);
757 builder
758 .standard_filters(false)
759 .hidden(true)
760 .follow_links(true);
761 builder
762 }
763
764 /// True if an absolute path lives under the store's root-level `log/`
765 /// rotation-archive directory.
766 fn is_in_log_dir(&self, abs: &Path) -> bool {
767 match self.rel_path(abs) {
768 Some(rel) => rel.components().next().map(|c| c.as_os_str()) == Some("log".as_ref()),
769 None => false,
770 }
771 }
772
773 /// Read a file's frontmatter `updated` field as an RFC3339 timestamp,
774 /// returning `None` when absent/unparseable. A self-contained reader (does
775 /// not depend on the not-yet-implemented `parser::read_file`); parses the
776 /// leading `---`-fenced YAML block with the same engine the parser uses.
777 fn read_updated(&self, abs: &Path) -> Option<DateTime<FixedOffset>> {
778 let text = std::fs::read_to_string(abs).ok()?;
779 let yaml = frontmatter_block(&text)?;
780 let value: serde_norway::Value = serde_norway::from_str(yaml).ok()?;
781 let raw = value.get("updated")?;
782 value_to_datetime(raw)
783 }
784
785 /// The `<YYYY>/<MM>` shard segment for a sharding type, from its primary
786 /// date field with a `created` fallback. Reads the public `Frontmatter`
787 /// fields directly. `None` when no usable date is present.
788 fn primary_shard_segment(&self, type_: &str, fm: &Frontmatter) -> Option<(String, String)> {
789 // Try the type's primary date field first.
790 if let Some(field) = primary_date_field(type_) {
791 if let Some(v) = fm.extra.get(field) {
792 if let Some(seg) = value_to_year_month(v) {
793 return Some(seg);
794 }
795 }
796 }
797 // Universal fallback: the typed `created` timestamp.
798 fm.created
799 .map(|dt| (format!("{:04}", dt.year()), format!("{:02}", dt.month())))
800 }
801}
802
803// ── Path containment (security) ─────────────────────────────────────────────
804
805/// Canonicalize `candidate` (resolving symlinks; for a not-yet-existing leaf,
806/// canonicalize its existing parent chain and re-append the leaf) and return it
807/// only if it resolves inside `store_root`; otherwise `Err`.
808///
809/// This is the single within-store containment gate. A wiki-link target, a
810/// rename destination, or any other caller-influenced path must pass through
811/// here before it is read or traversed, so a `..`-laden or symlink-escaping
812/// target can never turn a store operation into a read of an arbitrary file
813/// outside the store. `store_root` itself is canonicalized first so the
814/// `starts_with` comparison is symlink-stable on both sides (e.g. macOS's
815/// `/tmp` → `/private/tmp`).
816pub fn ensure_path_within_store(store_root: &Path, candidate: &Path) -> std::io::Result<PathBuf> {
817 // The `..` rejection below must apply only to the *caller-influenced* tail of
818 // the candidate — never to a `..` the trusted `store_root` itself carries.
819 // Callers build the candidate as `store_root.join(rel)`, so a user-supplied
820 // `--dir ../../some/store` legitimately seeds every candidate with leading
821 // `..` components that belong to the root, not to the sidecar/link target.
822 // Strip the trusted `store_root` prefix lexically and scrutinize only what
823 // remains; the root's own `..` is resolved safely by `canonicalize()` just
824 // below. A candidate that does NOT begin with `store_root` (an absolute
825 // out-of-store path, a CWD-relative target) keeps the whole path under
826 // scrutiny — there is no trusted prefix to exempt.
827 let scrutinized = candidate.strip_prefix(store_root).unwrap_or(candidate);
828
829 // Reject any `..` component in the scrutinized tail. A `ParentDir` can never
830 // be resolved safely by lexical normalization: once a symlink sits earlier in
831 // the path, `foo/../bar` does NOT equal `bar`, and canonicalizing the existing
832 // prefix (below) would silently collapse `records/contacts/../../outside` down
833 // to a path that *appears* inside the root, masking the traversal. There is no
834 // legitimate in-store caller that needs `..` in the tail — wiki-link targets,
835 // rename destinations, and graph reads are all forward (`Normal`-only) paths —
836 // so a tail `..` is always either an escape attempt or a malformed target.
837 if scrutinized
838 .components()
839 .any(|c| matches!(c, std::path::Component::ParentDir))
840 {
841 return Err(std::io::Error::new(
842 std::io::ErrorKind::PermissionDenied,
843 format!(
844 "path {} contains a `..` component beyond the store root {} and cannot be contained",
845 candidate.display(),
846 store_root.display()
847 ),
848 ));
849 }
850
851 // Canonicalize the root so both sides of the containment check are in the
852 // same (fully-resolved) namespace. This also resolves any `..` the root
853 // itself carries (the user-supplied `--dir`), which the tail-only check above
854 // deliberately left in place.
855 let root = store_root.canonicalize()?;
856
857 // Resolve the candidate as far as it exists on disk. `canonicalize` fails on
858 // a not-yet-existing leaf, so peel trailing components until the remaining
859 // prefix exists, canonicalize that, then re-append the peeled tail. This
860 // resolves any symlink in the existing parent chain (an escape vector) while
861 // still working for a target that does not exist yet (a rename destination).
862 let mut existing = candidate.to_path_buf();
863 let mut tail: Vec<std::ffi::OsString> = Vec::new();
864 let resolved_prefix = loop {
865 match existing.canonicalize() {
866 Ok(p) => break p,
867 Err(_) => {
868 // No existing prefix left to canonicalize → resolve relative to
869 // the canonical root (the candidate is somewhere under, or
870 // escaping from, the store) and let the containment check below
871 // decide. Pop one component and keep peeling.
872 match existing.file_name() {
873 Some(name) => {
874 tail.push(name.to_os_string());
875 if !existing.pop() {
876 // Ran out of components without finding an existing
877 // prefix: anchor the un-resolvable remainder at the
878 // canonical root so a relative candidate is judged
879 // against the store, not the process CWD.
880 break root.clone();
881 }
882 }
883 None => {
884 // A root/prefix component with no file name and no
885 // on-disk existence: anchor at the canonical root.
886 break root.clone();
887 }
888 }
889 }
890 }
891 };
892
893 // Reassemble: canonical existing prefix + the peeled (still-virtual) tail,
894 // in original order (the peel pushed them reversed).
895 let mut resolved = resolved_prefix;
896 for name in tail.into_iter().rev() {
897 resolved.push(name);
898 }
899
900 if resolved.starts_with(&root) {
901 Ok(resolved)
902 } else {
903 Err(std::io::Error::new(
904 std::io::ErrorKind::PermissionDenied,
905 format!(
906 "path {} resolves outside the store root {}",
907 candidate.display(),
908 store_root.display()
909 ),
910 ))
911 }
912}
913
914// ── The shared wiki-link edge notion (graph / stats / validate / rename) ─────
915//
916// One definition of "what `[[...]]` text is a real edge" that every relationship
917// op keys on, so `forwardlinks`, `backlinks`, `links`, `stats`, and `rename`
918// never disagree with each other (or with `validate`'s body extractor):
919//
920// 1. **Fence-aware.** A `[[...]]` inside a ``` / ~~~ fenced code block is a
921// documentation example, not an edge — exactly `validate`'s rule. Counting
922// it as an edge over-reports backlinks, falsely un-orphans the page, and
923// (worst) lets `rename` rewrite verbatim example text.
924// 2. **Whitespace-trimmed.** `[[ records/contacts/sarah ]]` is the same edge
925// as `[[records/contacts/sarah]]`. The inner padding is cosmetic; both the
926// forward and the backward view must resolve it identically.
927// 3. **Case-folded to the filesystem.** Link *resolution* is `is_file()`,
928// which is case-insensitive on macOS/Windows. So on a case-insensitive
929// filesystem `[[records/contacts/Sarah-Chen]]` and the on-disk
930// `sarah-chen.md` are the SAME edge; the comparison key must case-fold to
931// match, or backlinks/rename silently miss the link while validate (which
932// resolves via the filesystem) considers it fine.
933
934/// Canonicalize a raw `[[...]]` inner target into the wiki-link key: forward
935/// slashes, no leading `./` or `/`, no trailing `.md`, inner whitespace trimmed.
936/// The single key forward and backward edges are compared on. Pairs with
937/// [`link_edge_key`] for the case-fold step.
938pub fn canonical_link_target(raw: &str) -> String {
939 let mut s = raw.trim().replace('\\', "/");
940 while let Some(rest) = s.strip_prefix("./") {
941 s = rest.to_string();
942 }
943 let s = s.trim_start_matches('/');
944 let s = s.strip_suffix(".md").unwrap_or(s);
945 s.trim().to_string()
946}
947
948/// The comparison key for a canonical link target. Two normalizations, applied
949/// in order, so the string-keyed edge comparison agrees with how the filesystem
950/// resolves the same link:
951///
952/// 1. **Unicode NFC, always.** macOS/APFS folds NFC and NFD forms of a name to
953/// the same file, so a file `records/contacts/josé.md` written NFC
954/// (`é` = U+00E9) and a link `[[records/contacts/josé]]` written NFD
955/// (`e` + U+0301) name the *same* file on disk — yet their raw UTF-8 bytes
956/// differ. Without normalization the graph keys them as two different
957/// targets, so `backlinks`/`forwardlinks` miss the edge and `orphans` flags
958/// a linked-to file as an orphan, while `validate` (which resolves through
959/// the filesystem) sees the link as live: the surfaces silently disagree.
960/// Normalizing BOTH sides to NFC here makes the comparison
961/// normalization-insensitive, matching the filesystem. This lives in the
962/// comparison key — not in [`canonical_link_target`] — so the canonical
963/// form stays byte/normalization-preserving (rename REWRITE output is never
964/// silently re-normalized); both the link target and the file path pass
965/// through this function, so NFC here is sufficient to unify them.
966/// 2. **ASCII case-fold on a case-insensitive filesystem.** Identity on a
967/// case-sensitive FS, ASCII-lowercased on macOS/Windows, so the comparison
968/// also agrees with the filesystem's case-folding `is_file()` resolution.
969///
970/// Callers compare `link_edge_key(a) == link_edge_key(b)`.
971pub fn link_edge_key(canonical_target: &str) -> String {
972 use unicode_normalization::UnicodeNormalization;
973 // NFC first — always, on every platform: the graph must agree across hosts,
974 // and the comparison must be normalization-insensitive regardless of which
975 // host's filesystem folded the on-disk name.
976 let nfc: String = canonical_target.nfc().collect();
977 if fs_is_case_insensitive() {
978 nfc.to_ascii_lowercase()
979 } else {
980 nfc
981 }
982}
983
984/// Extract every wiki-link edge target from a markdown body, fence-aware and
985/// whitespace-trimmed, in document order (duplicates kept — callers dedup).
986/// Returns canonical targets (see [`canonical_link_target`]); the case-fold for
987/// comparison is applied separately via [`link_edge_key`] so the canonical form
988/// (used for rewrites/output) stays case-preserving.
989///
990/// Scans line-by-line tracking the fence state inline (no whole-body
991/// allocation), exactly mirroring validate's `extract_wiki_links`: the fence
992/// state is a `(fence char, run length)` tracked via [`fence_opens`] /
993/// [`fence_closes`] — NOT a bool toggled on any ``` / `~~~` line. The naive
994/// toggle inverts mid-block when a `~~~` block legally contains a ```` ``` ````
995/// line (the standard way to document a backtick fence), or when a `>3`-space-
996/// indented ``` is mistaken for a fence — both of which would let a fenced
997/// example `[[…]]` leak out as a live edge (a false dependent for
998/// backlinks/rename). Fenced lines never yield edges. Within a line, the text
999/// before the first `|` is the target; a target whose trimmed form starts with
1000/// `[` is the rejected triple-bracket flow-form list mis-encoding
1001/// (`[[[a]], [[b]]]`), not a real link — skipped, matching validate.
1002///
1003/// Accepts a whole file's text *or* a body-only fragment. A leading `---`
1004/// frontmatter block is YAML, not markdown: it has no code fences, and a
1005/// `[[…]]` in any frontmatter field is a real edge. The frontmatter is therefore
1006/// scanned WITHOUT fence tracking, and the body is scanned with a FRESH fence
1007/// state — so a stray ``` / `~~~` inside a frontmatter value can never open a
1008/// fence that swallows the body's real wiki-links. (Callers `search_by_link`,
1009/// `forwardlinks`, and `dbmd links` all pass full file text; without this
1010/// boundary reset a fenced frontmatter value silently dropped every subsequent
1011/// body edge — under-reporting backlinks/forwardlinks/`links`.) A fragment with
1012/// no leading frontmatter takes the body path unchanged.
1013pub fn extract_edge_targets(text: &str) -> Vec<String> {
1014 let mut out = Vec::new();
1015 // Split off a leading `---`…`---` frontmatter block (raw — no YAML parse, so
1016 // a malformed file is still fully scanned). Frontmatter links are edges but
1017 // must not participate in code-fence state.
1018 let body = match split_frontmatter_raw(text) {
1019 Some((frontmatter, body)) => {
1020 for line in frontmatter.lines() {
1021 push_edges_in_line(line, &mut out);
1022 }
1023 body
1024 }
1025 None => text,
1026 };
1027 let mut fence: Option<(u8, usize)> = None;
1028 for line in body.lines() {
1029 let content = line.trim_end_matches('\r');
1030 if let Some(f) = fence {
1031 if fence_closes(content, f) {
1032 fence = None;
1033 }
1034 continue;
1035 }
1036 if let Some(opened) = fence_opens(content) {
1037 fence = Some(opened);
1038 continue;
1039 }
1040 push_edges_in_line(line, &mut out);
1041 }
1042 out
1043}
1044
1045/// Push every `[[target]]` on one line into `out`, alias-stripped (`[[a|b]]` →
1046/// `a`), trimmed, and canonicalized. The triple-bracket flow-form mis-encoding
1047/// (`[[[a]], …]`) is skipped, matching validate. Shared by both the frontmatter
1048/// and body scans in [`extract_edge_targets`] so they honor one link grammar.
1049fn push_edges_in_line(line: &str, out: &mut Vec<String>) {
1050 let bytes = line.as_bytes();
1051 let mut i = 0usize;
1052 while i + 1 < bytes.len() {
1053 if bytes[i] == b'[' && bytes[i + 1] == b'[' {
1054 if let Some(close) = line[i + 2..].find("]]") {
1055 let inner = &line[i + 2..i + 2 + close];
1056 let raw_target = inner.split('|').next().unwrap_or(inner).trim();
1057 if !raw_target.is_empty() && !raw_target.starts_with('[') {
1058 let canonical = canonical_link_target(raw_target);
1059 if !canonical.is_empty() {
1060 out.push(canonical);
1061 }
1062 }
1063 i = i + 2 + close + 2;
1064 continue;
1065 }
1066 }
1067 i += 1;
1068 }
1069}
1070
1071/// If `line` opens a fenced code block, return `(fence byte, run length)`. The
1072/// single fence-open rule shared by [`extract_edge_targets`] and graph's
1073/// `rewrite_links_to`, mirroring validate's `fence_opens` and the parser's
1074/// `opening_fence` so every link op tracks fences identically: a fence is
1075/// ```` ``` ```` or `~~~` (run ≥ 3) at ≤ 3 spaces of indent, and a backtick
1076/// fence's info string may not itself contain a backtick.
1077pub fn fence_opens(line: &str) -> Option<(u8, usize)> {
1078 let indent = line.len() - line.trim_start_matches(' ').len();
1079 if indent > 3 {
1080 return None;
1081 }
1082 let rest = &line[indent..];
1083 let byte = rest.bytes().next()?;
1084 if byte != b'`' && byte != b'~' {
1085 return None;
1086 }
1087 let run = rest.len() - rest.trim_start_matches(byte as char).len();
1088 if run < 3 {
1089 return None;
1090 }
1091 // A backtick fence's info string may not itself contain a backtick.
1092 if byte == b'`' && rest[run..].contains('`') {
1093 return None;
1094 }
1095 Some((byte, run))
1096}
1097
1098/// True if `line` closes the currently open `fence`: same char, run at least as
1099/// long, nothing but trailing whitespace after. Mirrors validate's
1100/// `fence_closes` / the parser's `is_closing_fence`, so an inner fence of the
1101/// *other* character (a ```` ``` ```` line inside a `~~~` block) does NOT close
1102/// the outer fence.
1103pub fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
1104 let (byte, open_len) = fence;
1105 let indent = line.len() - line.trim_start_matches(' ').len();
1106 if indent > 3 {
1107 return false;
1108 }
1109 let rest = &line[indent..];
1110 let run = rest.len() - rest.trim_start_matches(byte as char).len();
1111 if run < open_len {
1112 return false;
1113 }
1114 rest[run..].trim().is_empty()
1115}
1116
1117/// True when the host filesystem resolves paths case-insensitively (macOS/
1118/// Windows default). Probed once per process against the OS temp dir by creating
1119/// a lowercase marker and stat-ing its uppercase spelling. A probe failure
1120/// conservatively reports `false` (case-sensitive) — the historical behavior —
1121/// so a transient temp-dir issue never silently widens matching.
1122fn fs_is_case_insensitive() -> bool {
1123 use std::sync::OnceLock;
1124 static CASE_INSENSITIVE: OnceLock<bool> = OnceLock::new();
1125 *CASE_INSENSITIVE.get_or_init(|| {
1126 let dir = std::env::temp_dir();
1127 let pid = std::process::id();
1128 let nanos = SystemTime::now()
1129 .duration_since(UNIX_EPOCH)
1130 .map(|d| d.as_nanos())
1131 .unwrap_or(0);
1132 let lower = dir.join(format!(".dbmd-case-probe-{pid}-{nanos}"));
1133 let upper = dir.join(format!(".DBMD-CASE-PROBE-{pid}-{nanos}"));
1134 // Create the lowercase marker; if its uppercase spelling then resolves to
1135 // a file, the filesystem folded the case → case-insensitive.
1136 let result = match std::fs::File::create(&lower) {
1137 Ok(_) => upper.is_file(),
1138 Err(_) => false,
1139 };
1140 let _ = std::fs::remove_file(&lower);
1141 result
1142 })
1143}
1144
1145// ── Free helpers (no `self`) ────────────────────────────────────────────────
1146
1147/// True if a walk entry is a regular file, **following symlinks** so a
1148/// symlinked `.md` content file (or a file inside a symlinked type folder) is
1149/// counted like any other content file.
1150///
1151/// The store walks enable `follow_links(true)`, so a symlink entry's
1152/// `file_type()` still reports `is_symlink()` (the `ignore` walker does not
1153/// rewrite the entry's own type), not the followed target's type. Treat a
1154/// symlink whose target is a regular file as a file: `stat` (follow) the path
1155/// and check. A broken symlink (no target) is not a file.
1156fn is_file_entry(entry: &ignore::DirEntry) -> bool {
1157 match entry.file_type() {
1158 Some(ft) if ft.is_file() => true,
1159 Some(ft) if ft.is_symlink() => std::fs::metadata(entry.path())
1160 .map(|m| m.is_file())
1161 .unwrap_or(false),
1162 // A `None` file type (the walk root itself) or a non-file/non-symlink
1163 // entry is not a content file.
1164 _ => false,
1165 }
1166}
1167
1168/// True if the path ends in a `.md` extension (case-sensitive — db.md files are
1169/// lowercase `.md`).
1170fn has_md_extension(path: &Path) -> bool {
1171 path.extension().and_then(|e| e.to_str()) == Some("md")
1172}
1173
1174/// True if the basename is a non-content meta file (`DB.md`, `index.md`,
1175/// `log.md`) that the content walks must skip.
1176fn is_non_content_basename(path: &Path) -> bool {
1177 match path.file_name().and_then(|n| n.to_str()) {
1178 Some(name) => NON_CONTENT_BASENAMES.contains(&name),
1179 None => false,
1180 }
1181}
1182
1183/// Append `.md` to a bare name; leave an existing `.md` untouched.
1184fn ensure_md_extension(name: &str) -> String {
1185 if name.ends_with(".md") {
1186 name.to_string()
1187 } else {
1188 format!("{name}.md")
1189 }
1190}
1191
1192/// The canonical default folder for a recognized type, per the SPEC type table
1193/// (`email → sources/emails`, `expense → records/expenses`, …). Unrecognized
1194/// types fall back to `records/<type>` (the bare type name, no pluralization
1195/// guess) — see the store findings on the docstring's looser `<type>` phrasing.
1196fn default_type_folder(type_: &str) -> PathBuf {
1197 let path = match type_ {
1198 // sources — documentary
1199 "email" => "sources/emails",
1200 "transcript" => "sources/transcripts",
1201 "pdf-source" => "sources/docs",
1202 // sources — testimonial (a human told the agent X)
1203 "note" => "sources/notes",
1204 // records — entities
1205 "contact" => "records/contacts",
1206 "company" => "records/companies",
1207 // records — events
1208 "expense" => "records/expenses",
1209 "meeting" => "records/meetings",
1210 "decision" => "records/decisions",
1211 "invoice" => "records/invoices",
1212 // unrecognized: bare type name under records/ (conclusions and any
1213 // custom type land here, e.g. `concept` → `records/concept`).
1214 other => return PathBuf::from("records").join(other),
1215 };
1216 PathBuf::from(path)
1217}
1218
1219/// The canonical [`Layer`] a `type_` belongs to, derived from its default
1220/// type-folder (`email` → `Sources`, `contact` → `Records`, a conclusion
1221/// `profile` → `Records`, unrecognized → `Records`). The write path uses this to decide whether
1222/// an agent-supplied folder is in the *right* layer for the type before honouring
1223/// its sub-folder choice.
1224pub fn layer_for_type(type_: &str) -> Layer {
1225 layer_of_folder(&default_type_folder(type_)).unwrap_or(Layer::Records)
1226}
1227
1228/// The [`Layer`] a type-folder path lives in, read from its first component
1229/// (`sources/` → `Sources`, `records/` → `Records`). Used to
1230/// bound [`Store::find_by_type`]'s whole-layer sidecar read to a single layer
1231/// subtree. Returns `None` for a path with no recognized layer prefix; every
1232/// value [`default_type_folder`] produces has one, so in practice this is
1233/// always `Some` on the call path — `None` degrades to a store-wide read.
1234fn layer_of_folder(folder: &Path) -> Option<Layer> {
1235 let first = folder.components().next()?.as_os_str().to_str()?;
1236 Layer::from_dir_name(first)
1237}
1238
1239/// True if a store-relative path is a db.md **content** file: rooted in a real
1240/// layer (`sources/` or `records/` as its FIRST component), with a `.md`
1241/// extension, and not an `index.md` sidecar. This is the SPEC's "content files =
1242/// everything under `sources/` and `records/` only" predicate (SPEC § content
1243/// files), keyed on the *first* component so a non-layer top-level dir is never
1244/// content even if a deeper component happens to be named `records`/`sources`
1245/// (e.g. `EXPECTED/records/x.md`, `archive/sources/y.md`).
1246///
1247/// It mirrors the graph engine's content filter so the surfaces that READ the
1248/// store (`graph backlinks`) and the surface that MUTATES it (`rename`) agree on
1249/// exactly which files are content. `rename` uses it to restrict its
1250/// link-rewrite set: a store-root file, a non-layer dir (`scratch/`,
1251/// `EXPECTED/`, `archive/`), or an `index.md` is NEVER rewritten — `rename` does
1252/// not own those bytes. The broad store scan ([`Store::find_links_to_any`],
1253/// shared with the read-only working-set validate) is left untouched; the filter
1254/// is applied at the point of mutation.
1255pub fn is_content_path(rel: &Path) -> bool {
1256 if layer_of_folder(rel).is_none() {
1257 return false;
1258 }
1259 if rel.extension().and_then(|e| e.to_str()) != Some("md") {
1260 return false;
1261 }
1262 rel.file_name().and_then(|n| n.to_str()) != Some("index.md")
1263}
1264
1265/// Infer a content file's canonical `type` from its store-relative path — the
1266/// inverse of [`default_type_folder`] and the single source of truth for
1267/// path→type inference (the CLI's `fm init` calls this, never re-derives it).
1268///
1269/// Requires the canonical `<layer>/<type-folder>/<file>` 3-component shape; a
1270/// shorter path (a file directly under a layer) or an unknown leading layer
1271/// yields `None`.
1272///
1273/// Recognized `(layer, folder)` pairs map back to their canonical type. For an
1274/// unrecognized folder the fallback is the **bare folder name verbatim** (no
1275/// pluralization/singularization) so it round-trips with `default_type_folder`,
1276/// whose unrecognized fallback is the bare type name (`task` ⇄ `records/task`).
1277/// Singularizing here would break that round-trip (`records/tasks` → `task`
1278/// while `default_type_folder("task")` → `records/task`). A conclusion record's
1279/// folder (e.g. `records/profiles/`) infers its bare folder name (`profiles`),
1280/// the same custom-type fallback as any other unrecognized folder.
1281pub fn infer_type_from_path(rel: &Path) -> Option<String> {
1282 let mut comps = rel.components().filter_map(|c| c.as_os_str().to_str());
1283 let layer = comps.next()?;
1284 if !matches!(layer, "sources" | "records") {
1285 return None;
1286 }
1287 let folder = comps.next()?;
1288 // The file itself must be a third component (a real type-folder, not the
1289 // file sitting directly under the layer).
1290 comps.next()?;
1291
1292 let mapped = match (layer, folder) {
1293 ("sources", "emails") => "email",
1294 ("sources", "transcripts") => "transcript",
1295 ("sources", "docs") => "pdf-source",
1296 ("sources", "notes") => "note",
1297 ("records", "contacts") => "contact",
1298 ("records", "companies") => "company",
1299 ("records", "expenses") => "expense",
1300 ("records", "meetings") => "meeting",
1301 ("records", "decisions") => "decision",
1302 ("records", "invoices") => "invoice",
1303 // Unrecognized folder: the bare name, verbatim. This is the inverse of
1304 // `default_type_folder`'s unrecognized fallback (`other → records/other`)
1305 // and the round-trip would break if we pluralized/singularized here.
1306 (_, other) => other,
1307 };
1308 Some(mapped.to_string())
1309}
1310
1311/// The primary date field name for a sharding type (the field whose value
1312/// drives `<YYYY>/<MM>`). `None` means "use the `created` fallback only".
1313fn primary_date_field(type_: &str) -> Option<&'static str> {
1314 match type_ {
1315 "email" => Some("date"),
1316 "transcript" => Some("recorded_at"),
1317 "pdf-source" => Some("received_at"),
1318 "note" => Some("told_at"),
1319 "expense" | "invoice" | "meeting" => Some("date"),
1320 // recognized custom event types have no canonical date field name; they
1321 // fall back to `created`.
1322 _ => None,
1323 }
1324}
1325
1326/// Parse a YAML value into an RFC3339 [`DateTime`], accepting both an explicit
1327/// string and a YAML-native scalar rendered to string.
1328fn value_to_datetime(value: &serde_norway::Value) -> Option<DateTime<FixedOffset>> {
1329 let s = yaml_scalar_string(value)?;
1330 DateTime::parse_from_rfc3339(s.trim()).ok()
1331}
1332
1333/// Extract `(YYYY, MM)` from a YAML date/timestamp value. Lenient: matches a
1334/// leading `YYYY-MM` so a bare `2026-05-22` date and a full
1335/// `2026-05-22T10:00:00-07:00` timestamp both work.
1336fn value_to_year_month(value: &serde_norway::Value) -> Option<(String, String)> {
1337 let s = yaml_scalar_string(value)?;
1338 year_month_from_str(s.trim())
1339}
1340
1341/// `(YYYY, MM)` from the leading `YYYY-M` or `YYYY-MM` of a date string, with
1342/// the month returned zero-padded to two digits.
1343///
1344/// The month may be single- OR double-digit so that `2026-1-15` and its
1345/// zero-padded twin `2026-01-15` shard to the *same* `2026/01` folder. This
1346/// matches the lenient `date`-shape validator (`is_iso8601_date_or_datetime`,
1347/// chrono `%Y-%m-%d`), which accepts an unpadded month — without this, a value
1348/// the validator treats as a valid date is silently mis-filed under the
1349/// `created`-fallback month. Genuinely non-date input still returns `None`.
1350fn year_month_from_str(s: &str) -> Option<(String, String)> {
1351 // Hand-roll the leading-`YYYY-M[M]` parse to avoid a regex compile on the
1352 // write path. Split on '-': require a 4-digit year, then a 1-or-2-digit
1353 // numeric month in 1..=12. Anything after the month (a `-DD` day, a `T...`
1354 // time) is ignored — the day field never separates the leading date.
1355 let mut parts = s.splitn(3, '-');
1356 let year = parts.next()?;
1357 let month_part = parts.next()?;
1358
1359 // Year: exactly 4 ASCII digits.
1360 if year.len() != 4 || !year.bytes().all(|b| b.is_ascii_digit()) {
1361 return None;
1362 }
1363
1364 // Month: 1 or 2 ASCII digits, value 1..=12. Padded to two digits on output.
1365 if month_part.is_empty()
1366 || month_part.len() > 2
1367 || !month_part.bytes().all(|b| b.is_ascii_digit())
1368 {
1369 return None;
1370 }
1371 let month: u8 = month_part.parse().ok()?;
1372 if !(1..=12).contains(&month) {
1373 return None;
1374 }
1375
1376 Some((year.to_string(), format!("{month:02}")))
1377}
1378
1379/// Render a YAML scalar as a string: a real `String` verbatim, otherwise the
1380/// value's compact YAML serialization (covers timestamps that the YAML engine
1381/// may surface as a non-string scalar).
1382fn yaml_scalar_string(value: &serde_norway::Value) -> Option<String> {
1383 if let Some(s) = value.as_str() {
1384 return Some(s.to_string());
1385 }
1386 match value {
1387 serde_norway::Value::Null => None,
1388 serde_norway::Value::Mapping(_) | serde_norway::Value::Sequence(_) => None,
1389 other => serde_norway::to_string(other)
1390 .ok()
1391 .map(|s| s.trim().to_string()),
1392 }
1393}
1394
1395/// The YAML frontmatter block of a file: the text between a leading `---` fence
1396/// and the next `---` fence, exclusive. `None` if the file does not open with a
1397/// `---` fence on its first line.
1398fn frontmatter_block(text: &str) -> Option<&str> {
1399 // Tolerate a UTF-8 BOM and CRLF, but the fence must be the very first line.
1400 let body = text.strip_prefix('\u{feff}').unwrap_or(text);
1401 let mut rest = body;
1402 // First line must be exactly `---`, tolerating trailing whitespace (CR, but
1403 // also spaces/tabs) — matching the canonical parser (`parser.rs` /
1404 // `index.rs`'s `extract_frontmatter_block`). A strict `\r`-only trim missed a
1405 // `--- ` fence, so `read_updated` returned None and date-sharding silently
1406 // fell back, disagreeing with the sidecar the rest of the toolkit builds.
1407 let (first, after_first) = split_first_line(rest);
1408 if first.trim_end() != "---" {
1409 return None;
1410 }
1411 rest = after_first;
1412 let block_start = rest;
1413 let mut scanned = 0usize;
1414 loop {
1415 let (line, after) = split_first_line(rest);
1416 if line.trim_end() == "---" {
1417 return Some(&block_start[..scanned]);
1418 }
1419 if after.is_empty() && line.is_empty() {
1420 // Reached end of input without a closing fence.
1421 return None;
1422 }
1423 scanned += line.len() + 1; // +1 for the consumed '\n'
1424 if after.is_empty() {
1425 return None;
1426 }
1427 rest = after;
1428 }
1429}
1430
1431/// Split a file's text into `(frontmatter, body)` at the leading `---`…`---`
1432/// fence — raw (no YAML parse), so a file with malformed frontmatter is still
1433/// split and fully scanned. `frontmatter` is the text between the fences
1434/// (exclusive); `body` is everything after the closing fence's line. Returns
1435/// `None` when the text does not open with a `---` fence or has no closing
1436/// fence — the caller then treats the whole text as body. Mirrors
1437/// [`frontmatter_block`]'s boundary detection (BOM- and CRLF-tolerant).
1438fn split_frontmatter_raw(text: &str) -> Option<(&str, &str)> {
1439 let stripped = text.strip_prefix('\u{feff}').unwrap_or(text);
1440 let (first, after_first) = split_first_line(stripped);
1441 if first.trim_end() != "---" {
1442 return None;
1443 }
1444 let block_start = after_first;
1445 let mut scanned = 0usize;
1446 let mut rest = after_first;
1447 loop {
1448 let (line, after) = split_first_line(rest);
1449 if line.trim_end() == "---" {
1450 // `after` is the body: everything past the closing fence line.
1451 return Some((&block_start[..scanned], after));
1452 }
1453 if after.is_empty() && line.is_empty() {
1454 return None; // reached EOF with no closing fence
1455 }
1456 scanned += line.len() + 1; // +1 for the consumed '\n'
1457 if after.is_empty() {
1458 return None; // closing fence never found
1459 }
1460 rest = after;
1461 }
1462}
1463
1464/// Split a string into (first line without its trailing `\n`, remainder after
1465/// the `\n`). If there is no newline, the whole string is the line and the
1466/// remainder is empty.
1467fn split_first_line(s: &str) -> (&str, &str) {
1468 match s.find('\n') {
1469 Some(i) => (&s[..i], &s[i + 1..]),
1470 None => (s, ""),
1471 }
1472}
1473
1474/// True if an [`IndexRecord`] has a field `key` equal to `value`, checking the
1475/// typed columns first and then the flattened `fields` map.
1476fn record_matches_field(record: &IndexRecord, key: &str, value: &str) -> bool {
1477 match key {
1478 "type" => record.type_ == value,
1479 "summary" => record.summary == value,
1480 "path" => record.path.to_string_lossy() == value,
1481 "created" => timestamp_matches(record.created, value),
1482 "updated" => timestamp_matches(record.updated, value),
1483 "tags" => record.tags.iter().any(|t| t == value),
1484 "links" => record.links.iter().any(|l| l == value),
1485 other => record
1486 .fields
1487 .get(other)
1488 .map(|v| json_value_matches(v, value))
1489 .unwrap_or(false),
1490 }
1491}
1492
1493/// Compare a record's `created`/`updated` instant against a query `value`.
1494///
1495/// db.md files write timestamps in several equivalent RFC3339 spellings — most
1496/// commonly the `Z` UTC designator (`2026-05-01T00:00:00Z`) but also an explicit
1497/// offset (`...+00:00`, `...-07:00`). A naive `record.created.to_rfc3339() ==
1498/// value` reformats only one side: chrono renders a UTC instant as `+00:00`, so
1499/// the `Z` form an agent reads straight out of the file would never match. We
1500/// instead parse `value` as RFC3339 and compare instants, where `Z` and `+00:00`
1501/// (and any same-instant offset) are equal. A `value` that is not valid RFC3339
1502/// can never equal a real timestamp, so it falls through to `false`.
1503fn timestamp_matches(stored: Option<DateTime<FixedOffset>>, value: &str) -> bool {
1504 match (stored, DateTime::parse_from_rfc3339(value)) {
1505 (Some(stored), Ok(queried)) => stored == queried,
1506 _ => false,
1507 }
1508}
1509
1510/// Match a JSON number against a query string.
1511///
1512/// A FLOAT-valued field is compared NUMERICALLY, not textually: the sidecar
1513/// stores a YAML float through serde_json's canonical f64 rendering, which
1514/// discards the file's source spelling (`1234.00` -> `1234.0`, `12.50` ->
1515/// `12.5`, `1e3` -> `1000.0`). A raw `to_string()` compare therefore made the
1516/// spelling a human reads in the file fail to match (and disagreed with
1517/// free-text `search`), while requiring a canonical form often absent from the
1518/// file. We parse the query as f64 and compare values. Restricted to the float
1519/// case so a large INTEGER field never loses exactness to f64 rounding (integers
1520/// render canonically and round-trip exactly through the textual compare).
1521/// Mirrors the parse-then-compare pattern [`timestamp_matches`] already uses.
1522fn number_matches(n: &serde_json::Number, value: &str) -> bool {
1523 if n.to_string() == value {
1524 return true;
1525 }
1526 if n.is_f64() {
1527 if let (Some(stored), Ok(q)) = (n.as_f64(), value.parse::<f64>()) {
1528 return stored == q;
1529 }
1530 }
1531 false
1532}
1533
1534/// Compare a JSON field value against a query string. A string matches
1535/// verbatim; scalars match their textual form; an array matches if any element
1536/// matches (so a list-valued frontmatter field is membership-queried).
1537fn json_value_matches(v: &serde_json::Value, value: &str) -> bool {
1538 match v {
1539 serde_json::Value::String(s) => s == value,
1540 serde_json::Value::Bool(b) => b.to_string() == value,
1541 serde_json::Value::Number(n) => number_matches(n, value),
1542 serde_json::Value::Array(items) => items.iter().any(|i| json_value_matches(i, value)),
1543 // A present-but-null field never matches — consistent with the in-memory
1544 // post-filter (`query::json_value_matches`, which the first `where`
1545 // clause is NOT re-checked against, so the two must agree here or a
1546 // `--where field=` query would return different rows than `--type X
1547 // --where field=`).
1548 serde_json::Value::Null => false,
1549 serde_json::Value::Object(_) => false,
1550 }
1551}
1552
1553#[cfg(test)]
1554mod tests {
1555 use super::*;
1556 use std::fs;
1557 use tempfile::{tempdir, TempDir};
1558
1559 // ── Fixtures ────────────────────────────────────────────────────────────
1560
1561 /// Write `contents` to `<root>/<rel>`, creating parent dirs. Returns the
1562 /// store-relative path for convenient assertions.
1563 fn write(root: &Path, rel: &str, contents: &str) -> PathBuf {
1564 let abs = root.join(rel);
1565 fs::create_dir_all(abs.parent().unwrap()).unwrap();
1566 fs::write(&abs, contents).unwrap();
1567 PathBuf::from(rel)
1568 }
1569
1570 /// A minimal content file with the given `updated` timestamp in frontmatter.
1571 fn content_md(updated: &str) -> String {
1572 format!(
1573 "---\ntype: note\ncreated: {updated}\nupdated: {updated}\nsummary: a note\n---\n\nbody\n"
1574 )
1575 }
1576
1577 /// A bare directory with a `DB.md` marker (valid `db-md` frontmatter so the
1578 /// real parser is exercised).
1579 fn empty_store() -> TempDir {
1580 let dir = tempdir().unwrap();
1581 fs::write(
1582 dir.path().join("DB.md"),
1583 "---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n",
1584 )
1585 .unwrap();
1586 dir
1587 }
1588
1589 /// Open a store rooted at a TempDir; panics if `open` rejects it.
1590 fn open(dir: &TempDir) -> Store {
1591 Store::open(dir.path()).expect("fixture should be a valid store")
1592 }
1593
1594 fn rels(paths: &[PathBuf]) -> Vec<String> {
1595 paths
1596 .iter()
1597 .map(|p| p.to_string_lossy().replace('\\', "/"))
1598 .collect()
1599 }
1600
1601 // ── Layer ───────────────────────────────────────────────────────────────
1602
1603 #[test]
1604 fn layer_dir_name_and_parse_are_inverse() {
1605 for layer in Layer::all() {
1606 assert_eq!(Layer::from_dir_name(layer.dir_name()), Some(layer));
1607 }
1608 assert_eq!(Layer::Sources.dir_name(), "sources");
1609 assert_eq!(Layer::Records.dir_name(), "records");
1610 // `wiki` is no longer a layer (the wiki/ layer was removed); it parses to None.
1611 assert_eq!(Layer::from_dir_name("wiki"), None);
1612 assert_eq!(Layer::from_dir_name("log"), None);
1613 assert_eq!(Layer::from_dir_name("Sources"), None); // case-sensitive
1614 }
1615
1616 #[test]
1617 fn layer_order_is_canonical() {
1618 // stats keys a BTreeMap on Layer; the sort order must be sources<records.
1619 let mut v = [Layer::Records, Layer::Sources];
1620 v.sort();
1621 assert_eq!(v, [Layer::Sources, Layer::Records]);
1622 }
1623
1624 #[test]
1625 fn is_content_path_is_layer_rooted_and_excludes_non_layer_files() {
1626 // Real content: a `.md` file rooted in a layer's FIRST component.
1627 assert!(is_content_path(Path::new("records/contacts/alice.md")));
1628 assert!(is_content_path(Path::new("sources/emails/2026/05/x.md")));
1629 // Store-root meta files and a bare top-level note are NOT content.
1630 assert!(!is_content_path(Path::new("DB.md")));
1631 assert!(!is_content_path(Path::new("log.md")));
1632 assert!(!is_content_path(Path::new("NOTES.md")));
1633 // Non-layer top-level dirs are NEVER content — even if a DEEPER
1634 // component is named `records`/`sources` (the rename data-loss case).
1635 assert!(!is_content_path(Path::new("scratch/draft.md")));
1636 assert!(!is_content_path(Path::new("EXPECTED/snapshot.md")));
1637 assert!(!is_content_path(Path::new("archive/old.md")));
1638 assert!(!is_content_path(Path::new(
1639 "EXPECTED/records/contacts/x.md"
1640 )));
1641 assert!(!is_content_path(Path::new("archive/sources/emails/y.md")));
1642 // An `index.md` sidecar inside a layer is a catalog, not content.
1643 assert!(!is_content_path(Path::new("records/contacts/index.md")));
1644 // A non-`.md` file inside a layer (e.g. the jsonl sidecar) is not content.
1645 assert!(!is_content_path(Path::new("records/contacts/index.jsonl")));
1646 }
1647
1648 // ── is_db_md_store / open ────────────────────────────────────────────────
1649
1650 #[test]
1651 fn is_store_true_only_with_uppercase_marker() {
1652 let dir = tempdir().unwrap();
1653 assert!(
1654 !Store::is_db_md_store(dir.path()),
1655 "no marker → not a store"
1656 );
1657
1658 fs::write(dir.path().join("DB.md"), "---\ntype: db-md\n---\n").unwrap();
1659 assert!(Store::is_db_md_store(dir.path()), "uppercase DB.md → store");
1660 }
1661
1662 #[test]
1663 fn is_store_false_for_lowercase_db_md() {
1664 // The case-sensitivity contract: a lowercase db.md is the spec name, not
1665 // a marker — even on a case-insensitive filesystem where Path::exists
1666 // would lie. This test must pass on macOS (case-insensitive) too.
1667 let dir = tempdir().unwrap();
1668 fs::write(dir.path().join("db.md"), "---\ntype: db-md\n---\n").unwrap();
1669 assert!(
1670 !Store::is_db_md_store(dir.path()),
1671 "lowercase db.md must NOT be treated as a store marker"
1672 );
1673 assert!(Store::open(dir.path()).is_err());
1674 }
1675
1676 #[test]
1677 fn is_store_false_when_db_md_is_a_directory() {
1678 let dir = tempdir().unwrap();
1679 fs::create_dir(dir.path().join("DB.md")).unwrap();
1680 assert!(
1681 !Store::is_db_md_store(dir.path()),
1682 "a directory named DB.md is not the file marker"
1683 );
1684 }
1685
1686 #[test]
1687 fn open_rejects_non_store_with_path() {
1688 let dir = tempdir().unwrap();
1689 let err = Store::open(dir.path()).unwrap_err();
1690 assert_eq!(err.path, dir.path());
1691 }
1692
1693 #[test]
1694 fn open_succeeds_and_parses_config() {
1695 let dir = tempdir().unwrap();
1696 // A DB.md whose ## Policies declares a frozen page — proves open()
1697 // actually parsed the config rather than substituting a default.
1698 fs::write(
1699 dir.path().join("DB.md"),
1700 "---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n\n\
1701 ## Policies\n\n### Frozen pages\n- records/decisions/q1.md\n",
1702 )
1703 .unwrap();
1704 let store = Store::open(dir.path()).unwrap();
1705 assert_eq!(store.root, dir.path());
1706 assert!(
1707 store
1708 .config
1709 .frozen_pages
1710 .iter()
1711 .any(|p| p == Path::new("records/decisions/q1.md")),
1712 "open() must surface DB.md ## Policies, got {:?}",
1713 store.config.frozen_pages
1714 );
1715 }
1716
1717 // ── walk / walk_layer / walk_type_folder ─────────────────────────────────
1718
1719 #[test]
1720 fn walk_collects_content_across_layers_skipping_meta_and_log() {
1721 let dir = empty_store();
1722 let root = dir.path();
1723 write(
1724 root,
1725 "sources/emails/2026/05/a.md",
1726 &content_md("2026-05-01T00:00:00Z"),
1727 );
1728 write(
1729 root,
1730 "records/contacts/sarah.md",
1731 &content_md("2026-05-02T00:00:00Z"),
1732 );
1733 write(
1734 root,
1735 "records/profiles/sarah.md",
1736 &content_md("2026-05-03T00:00:00Z"),
1737 );
1738 // Things walk() must SKIP:
1739 write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); // catalog
1740 write(root, "index.md", "---\ntype: index\n---\n"); // root catalog
1741 write(root, "log.md", "---\ntype: log\n---\n"); // log
1742 write(root, "log/2026-04.md", "---\ntype: log\n---\n"); // rotated log archive
1743 write(
1744 root,
1745 "sources/.hidden/secret.md",
1746 &content_md("2026-05-09T00:00:00Z"),
1747 ); // hidden dir
1748 write(root, "records/contacts/notes.txt", "not markdown"); // non-md
1749
1750 let store = open(&dir);
1751 let got = rels(&store.walk().unwrap());
1752 assert_eq!(
1753 got,
1754 vec![
1755 "records/contacts/sarah.md".to_string(),
1756 "records/profiles/sarah.md".to_string(),
1757 "sources/emails/2026/05/a.md".to_string(),
1758 ]
1759 );
1760 }
1761
1762 #[test]
1763 fn walk_includes_content_named_log_md_or_db_md_inside_a_layer() {
1764 let dir = empty_store();
1765 let root = dir.path();
1766 // A content file that merely happens to be named log.md / DB.md INSIDE a
1767 // layer is real content — those names are reserved only at the store root.
1768 write(
1769 root,
1770 "records/configs/log.md",
1771 &content_md("2026-05-01T00:00:00Z"),
1772 );
1773 write(
1774 root,
1775 "sources/docs/DB.md",
1776 &content_md("2026-05-02T00:00:00Z"),
1777 );
1778 // The derived catalog twin is still skipped at any depth.
1779 write(root, "records/configs/index.md", "---\ntype: index\n---\n");
1780 let store = open(&dir);
1781 let got = rels(&store.walk().unwrap());
1782 assert!(
1783 got.contains(&"records/configs/log.md".to_string()),
1784 "layer-internal log.md is content: {got:?}"
1785 );
1786 assert!(
1787 got.contains(&"sources/docs/DB.md".to_string()),
1788 "layer-internal DB.md is content: {got:?}"
1789 );
1790 assert!(
1791 !got.iter().any(|p| p.ends_with("index.md")),
1792 "index.md is still skipped: {got:?}"
1793 );
1794 }
1795
1796 #[test]
1797 fn walk_layer_is_scoped() {
1798 let dir = empty_store();
1799 let root = dir.path();
1800 write(
1801 root,
1802 "sources/emails/2026/05/a.md",
1803 &content_md("2026-05-01T00:00:00Z"),
1804 );
1805 write(
1806 root,
1807 "records/contacts/sarah.md",
1808 &content_md("2026-05-02T00:00:00Z"),
1809 );
1810 let store = open(&dir);
1811
1812 assert_eq!(
1813 rels(&store.walk_layer(Layer::Sources).unwrap()),
1814 vec!["sources/emails/2026/05/a.md".to_string()]
1815 );
1816 assert_eq!(
1817 rels(&store.walk_layer(Layer::Records).unwrap()),
1818 vec!["records/contacts/sarah.md".to_string()]
1819 );
1820 // A layer with no directory is empty, not an error: a store with only a
1821 // sources/ tree has no records/ dir, so walking Records is empty.
1822 let only_sources = empty_store();
1823 write(
1824 only_sources.path(),
1825 "sources/emails/2026/05/a.md",
1826 &content_md("2026-05-01T00:00:00Z"),
1827 );
1828 let s2 = open(&only_sources);
1829 assert!(s2.walk_layer(Layer::Records).unwrap().is_empty());
1830 }
1831
1832 #[test]
1833 fn walk_type_folder_recurses_shards_and_accepts_abs_or_rel() {
1834 let dir = empty_store();
1835 let root = dir.path();
1836 write(
1837 root,
1838 "sources/emails/2026/05/a.md",
1839 &content_md("2026-05-01T00:00:00Z"),
1840 );
1841 write(
1842 root,
1843 "sources/emails/2026/06/b.md",
1844 &content_md("2026-06-01T00:00:00Z"),
1845 );
1846 write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); // skipped
1847 // A different type folder must not leak in.
1848 write(
1849 root,
1850 "sources/docs/2026/05/c.md",
1851 &content_md("2026-05-04T00:00:00Z"),
1852 );
1853 let store = open(&dir);
1854
1855 let expected = vec![
1856 "sources/emails/2026/05/a.md".to_string(),
1857 "sources/emails/2026/06/b.md".to_string(),
1858 ];
1859 // Relative folder arg.
1860 assert_eq!(
1861 rels(&store.walk_type_folder(Path::new("sources/emails")).unwrap()),
1862 expected
1863 );
1864 // Absolute folder arg under the store resolves identically.
1865 assert_eq!(
1866 rels(
1867 &store
1868 .walk_type_folder(&root.join("sources/emails"))
1869 .unwrap()
1870 ),
1871 expected
1872 );
1873 }
1874
1875 // ── recent_in_type_folder ────────────────────────────────────────────────
1876
1877 #[test]
1878 fn recent_orders_by_updated_desc_then_path_and_caps() {
1879 let dir = empty_store();
1880 let root = dir.path();
1881 // newest
1882 write(
1883 root,
1884 "records/meetings/2026/05/c.md",
1885 &content_md("2026-05-03T00:00:00Z"),
1886 );
1887 // tie on updated — path asc decides (a before b)
1888 write(
1889 root,
1890 "records/meetings/2026/05/a.md",
1891 &content_md("2026-05-02T00:00:00Z"),
1892 );
1893 write(
1894 root,
1895 "records/meetings/2026/05/b.md",
1896 &content_md("2026-05-02T00:00:00Z"),
1897 );
1898 // oldest
1899 write(
1900 root,
1901 "records/meetings/2026/04/z.md",
1902 &content_md("2026-04-01T00:00:00Z"),
1903 );
1904 let store = open(&dir);
1905
1906 let all = rels(
1907 &store
1908 .recent_in_type_folder(Path::new("records/meetings"), 10)
1909 .unwrap(),
1910 );
1911 assert_eq!(
1912 all,
1913 vec![
1914 "records/meetings/2026/05/c.md".to_string(), // newest
1915 "records/meetings/2026/05/a.md".to_string(), // tie, path asc
1916 "records/meetings/2026/05/b.md".to_string(),
1917 "records/meetings/2026/04/z.md".to_string(), // oldest
1918 ]
1919 );
1920
1921 // Cap takes the n most-recent.
1922 let top2 = rels(
1923 &store
1924 .recent_in_type_folder(Path::new("records/meetings"), 2)
1925 .unwrap(),
1926 );
1927 assert_eq!(
1928 top2,
1929 vec![
1930 "records/meetings/2026/05/c.md".to_string(),
1931 "records/meetings/2026/05/a.md".to_string(),
1932 ]
1933 );
1934 }
1935
1936 #[test]
1937 fn recent_sorts_undated_files_last() {
1938 let dir = empty_store();
1939 let root = dir.path();
1940 write(
1941 root,
1942 "records/contacts/dated.md",
1943 &content_md("2026-05-01T00:00:00Z"),
1944 );
1945 // No `updated` field at all.
1946 write(
1947 root,
1948 "records/contacts/undated.md",
1949 "---\ntype: contact\nsummary: x\n---\nbody\n",
1950 );
1951 let store = open(&dir);
1952 let got = rels(
1953 &store
1954 .recent_in_type_folder(Path::new("records/contacts"), 10)
1955 .unwrap(),
1956 );
1957 assert_eq!(
1958 got,
1959 vec![
1960 "records/contacts/dated.md".to_string(),
1961 "records/contacts/undated.md".to_string(),
1962 ],
1963 "a file with a real `updated` must outrank one with none"
1964 );
1965 }
1966
1967 // ── type_shards ──────────────────────────────────────────────────────────
1968
1969 #[test]
1970 fn type_shards_classification() {
1971 let dir = empty_store();
1972 let store = open(&dir);
1973 for t in [
1974 "email",
1975 "transcript",
1976 "pdf-source",
1977 "expense",
1978 "invoice",
1979 "meeting",
1980 "order",
1981 "ticket",
1982 "transaction",
1983 ] {
1984 assert!(store.type_shards(t), "{t} should shard");
1985 }
1986 for t in [
1987 "contact", "company", "decision", "profile", "index", "log", "db-md", "proposal",
1988 ] {
1989 assert!(!store.type_shards(t), "{t} should stay flat");
1990 }
1991 }
1992
1993 #[test]
1994 fn type_shards_respects_schema_directive_both_directions() {
1995 use crate::parser::{Config, Schema};
1996 let dir = empty_store();
1997 let mut store = open(&dir);
1998 let mut config = Config::default();
1999 // A CUSTOM type (not in the built-in list) opts into date-sharding —
2000 // without the schema override `type_shards` would return false for it.
2001 config.schemas.insert(
2002 "shipment".to_string(),
2003 Schema {
2004 shard: Some(true),
2005 ..Schema::default()
2006 },
2007 );
2008 // A BUILT-IN event type opts OUT (flat) — the override wins over the
2009 // built-in default.
2010 config.schemas.insert(
2011 "expense".to_string(),
2012 Schema {
2013 shard: Some(false),
2014 ..Schema::default()
2015 },
2016 );
2017 // A schema with no `shard:` directive leaves the built-in default intact.
2018 config
2019 .schemas
2020 .insert("meeting".to_string(), Schema::default());
2021 store.config = config;
2022
2023 assert!(
2024 store.type_shards("shipment"),
2025 "custom type with `shard: by-date` must shard"
2026 );
2027 assert!(
2028 !store.type_shards("expense"),
2029 "built-in event type with `shard: flat` must go flat"
2030 );
2031 assert!(
2032 store.type_shards("meeting"),
2033 "schema without a `shard:` directive keeps the built-in default"
2034 );
2035 assert!(
2036 !store.type_shards("contact"),
2037 "unconfigured entity type stays flat"
2038 );
2039 }
2040
2041 // ── year_month_from_str ──────────────────────────────────────────────────
2042
2043 #[test]
2044 fn year_month_from_str_accepts_unpadded_month() {
2045 // A single-digit month shards to the same zero-padded folder as its twin,
2046 // matching the lenient `date`-shape validator (chrono `%Y-%m-%d`).
2047 let ym = year_month_from_str;
2048 assert_eq!(
2049 ym("2026-1-15"),
2050 Some(("2026".to_string(), "01".to_string())),
2051 );
2052 assert_eq!(
2053 ym("2026-01-15"),
2054 Some(("2026".to_string(), "01".to_string())),
2055 );
2056 assert_eq!(
2057 ym("2026-12-5"),
2058 Some(("2026".to_string(), "12".to_string())),
2059 );
2060 assert_eq!(ym("2026-1"), Some(("2026".to_string(), "01".to_string())));
2061 // Full timestamps still parse off the leading date.
2062 assert_eq!(
2063 ym("2026-3-22T10:00:00-07:00"),
2064 Some(("2026".to_string(), "03".to_string())),
2065 );
2066 }
2067
2068 #[test]
2069 fn year_month_from_str_rejects_non_dates() {
2070 // Genuinely non-date input still returns None (behavior unchanged).
2071 assert_eq!(year_month_from_str(""), None);
2072 assert_eq!(year_month_from_str("not-a-date"), None);
2073 assert_eq!(year_month_from_str("2026"), None); // no month part
2074 assert_eq!(year_month_from_str("26-1-15"), None); // year not 4 digits
2075 assert_eq!(year_month_from_str("2026-13-01"), None); // month out of range
2076 assert_eq!(year_month_from_str("2026-0-01"), None); // month zero
2077 assert_eq!(year_month_from_str("2026-001-01"), None); // month over 2 digits
2078 assert_eq!(year_month_from_str("2026-x-01"), None); // non-numeric month
2079 assert_eq!(year_month_from_str("20a6-1-15"), None); // non-numeric year
2080 }
2081
2082 #[test]
2083 fn shard_path_accepts_unpadded_month_same_as_padded() {
2084 // End-to-end: an unpadded `date` shards to its real month, identically to
2085 // its zero-padded twin — not to the `created`-fallback month.
2086 let dir = empty_store();
2087 let store = open(&dir);
2088
2089 let padded = store
2090 .shard_path_for("expense", &fm_with_extra("date", "2026-01-15"), "padded")
2091 .unwrap();
2092 assert_eq!(padded, PathBuf::from("records/expenses/2026/01/padded.md"));
2093
2094 let single = store
2095 .shard_path_for("expense", &fm_with_extra("date", "2026-1-15"), "single")
2096 .unwrap();
2097 assert_eq!(single, PathBuf::from("records/expenses/2026/01/single.md"));
2098 }
2099
2100 // ── shard_path_for ───────────────────────────────────────────────────────
2101
2102 fn fm_with_extra(key: &str, value: &str) -> Frontmatter {
2103 let mut fm = Frontmatter::default();
2104 fm.extra.insert(
2105 key.to_string(),
2106 serde_norway::Value::String(value.to_string()),
2107 );
2108 fm
2109 }
2110
2111 fn fm_with_created(rfc3339: &str) -> Frontmatter {
2112 Frontmatter {
2113 created: Some(DateTime::parse_from_rfc3339(rfc3339).unwrap()),
2114 ..Default::default()
2115 }
2116 }
2117
2118 #[test]
2119 fn shard_path_uses_primary_date_field_per_type() {
2120 let dir = empty_store();
2121 let store = open(&dir);
2122
2123 // expense.date → records/expenses/<YYYY>/<MM>/
2124 let p = store
2125 .shard_path_for("expense", &fm_with_extra("date", "2026-05-22"), "lunch")
2126 .unwrap();
2127 assert_eq!(p, PathBuf::from("records/expenses/2026/05/lunch.md"));
2128
2129 // email.date → sources/emails/<YYYY>/<MM>/
2130 let p = store
2131 .shard_path_for(
2132 "email",
2133 &fm_with_extra("date", "2026-11-02T09:00:00-07:00"),
2134 "e1",
2135 )
2136 .unwrap();
2137 assert_eq!(p, PathBuf::from("sources/emails/2026/11/e1.md"));
2138
2139 // transcript.recorded_at → sources/transcripts/<YYYY>/<MM>/
2140 let p = store
2141 .shard_path_for(
2142 "transcript",
2143 &fm_with_extra("recorded_at", "2025-01-15T12:00:00Z"),
2144 "t1",
2145 )
2146 .unwrap();
2147 assert_eq!(p, PathBuf::from("sources/transcripts/2025/01/t1.md"));
2148 }
2149
2150 #[test]
2151 fn shard_path_falls_back_to_created() {
2152 let dir = empty_store();
2153 let store = open(&dir);
2154 // meeting with no `date` field but a `created` timestamp.
2155 let p = store
2156 .shard_path_for(
2157 "meeting",
2158 &fm_with_created("2024-07-09T08:30:00-04:00"),
2159 "sync",
2160 )
2161 .unwrap();
2162 assert_eq!(p, PathBuf::from("records/meetings/2024/07/sync.md"));
2163 }
2164
2165 #[test]
2166 fn shard_path_primary_field_wins_over_created() {
2167 let dir = empty_store();
2168 let store = open(&dir);
2169 let mut fm = fm_with_created("2020-01-01T00:00:00Z");
2170 fm.extra.insert(
2171 "date".into(),
2172 serde_norway::Value::String("2026-05-22".into()),
2173 );
2174 let p = store.shard_path_for("expense", &fm, "x").unwrap();
2175 // The primary `date` (2026/05), not `created` (2020/01), drives the shard.
2176 assert_eq!(p, PathBuf::from("records/expenses/2026/05/x.md"));
2177 }
2178
2179 #[test]
2180 fn shard_path_flat_types_have_no_shard_segment() {
2181 let dir = empty_store();
2182 let store = open(&dir);
2183 // A contact has a `created` date, but contacts stay flat.
2184 let p = store
2185 .shard_path_for(
2186 "contact",
2187 &fm_with_created("2026-05-22T00:00:00Z"),
2188 "sarah-chen",
2189 )
2190 .unwrap();
2191 assert_eq!(p, PathBuf::from("records/contacts/sarah-chen.md"));
2192
2193 // A conclusion `profile` is a custom (non-built-in) type: it is flat (no
2194 // date shard) and lands under the records-layer fallback folder
2195 // `records/<type>` — `records/profile/<name>.md`, a conforming 3-component
2196 // `<layer>/<type-folder>/<file>` path. A 2-component path would be
2197 // invisible to the index/validate type-folder model.
2198 let p = store
2199 .shard_path_for("profile", &Frontmatter::default(), "renewal-theme")
2200 .unwrap();
2201 assert_eq!(p, PathBuf::from("records/profile/renewal-theme.md"));
2202 }
2203
2204 /// Regression: a type written through the toolkit's own path computation
2205 /// must land at a path the index + validate type-folder model accepts. A
2206 /// 2-component `<layer>/<file>` path is one `type_folder_of` (in both `index`
2207 /// and `validate`) treats as "no type-folder" — it would either crash
2208 /// `Index::on_write` (it tried to create `index.md` inside a file) or be
2209 /// silently dropped from every catalog by `Index::rebuild_all`. A custom
2210 /// (non-built-in) type like a conclusion `profile` falls back to
2211 /// `records/<type>` — still a conforming 3-component
2212 /// `<layer>/<type-folder>/<file>` path.
2213 #[test]
2214 fn shard_path_custom_type_is_indexable_three_component_path() {
2215 let dir = empty_store();
2216 let store = open(&dir);
2217 let p = store
2218 .shard_path_for("profile", &Frontmatter::default(), "renewal-theme")
2219 .unwrap();
2220 // First two components are a layer + a non-empty type-folder segment;
2221 // the file is the third. This is exactly the shape `type_folder_of`
2222 // (`comps.len() >= 3`, `comps[0]` a known layer) requires.
2223 let comps: Vec<&str> = p.iter().filter_map(|c| c.to_str()).collect();
2224 assert_eq!(
2225 comps.len(),
2226 3,
2227 "custom-type path must be <layer>/<type-folder>/<file>, got {p:?}"
2228 );
2229 assert_eq!(
2230 comps[0], "records",
2231 "first component must be the records layer (a custom type is \
2232 filed under the records fallback)"
2233 );
2234 assert!(
2235 !comps[1].is_empty() && comps[1] != "renewal-theme.md",
2236 "second component must be a real type-folder, not the file: {p:?}"
2237 );
2238 assert!(
2239 comps[2].ends_with(".md"),
2240 "third component must be the .md file: {p:?}"
2241 );
2242 }
2243
2244 #[test]
2245 fn shard_path_preserves_and_adds_md_extension() {
2246 let dir = empty_store();
2247 let store = open(&dir);
2248 let with = store
2249 .shard_path_for("contact", &Frontmatter::default(), "sarah.md")
2250 .unwrap();
2251 let without = store
2252 .shard_path_for("contact", &Frontmatter::default(), "sarah")
2253 .unwrap();
2254 assert_eq!(with, PathBuf::from("records/contacts/sarah.md"));
2255 assert_eq!(without, PathBuf::from("records/contacts/sarah.md"));
2256 }
2257
2258 #[test]
2259 fn shard_path_errors_when_sharding_type_has_no_date() {
2260 let dir = empty_store();
2261 let store = open(&dir);
2262 // expense shards, but no `date` and no `created` → NoShardDate.
2263 let err = store
2264 .shard_path_for("expense", &Frontmatter::default(), "mystery")
2265 .unwrap_err();
2266 match err {
2267 StoreError::NoShardDate { file } => {
2268 assert_eq!(file, PathBuf::from("records/expenses/mystery.md"));
2269 }
2270 other => panic!("expected NoShardDate, got {other:?}"),
2271 }
2272 }
2273
2274 // ── find_links_to ────────────────────────────────────────────────────────
2275
2276 #[test]
2277 fn find_links_to_matches_all_accepted_spellings() {
2278 let dir = empty_store();
2279 let root = dir.path();
2280 let target = "records/contacts/sarah-chen";
2281
2282 // Plain link.
2283 write(
2284 root,
2285 "records/profiles/sarah.md",
2286 &format!(
2287 "---\ntype: profile\nmeta-type: conclusion\nsummary: s\n---\nSee [[{target}]].\n"
2288 ),
2289 );
2290 // Link with display text.
2291 write(
2292 root,
2293 "records/meetings/2026/05/m.md",
2294 &format!("---\ntype: meeting\nsummary: s\n---\nWith [[{target}|Sarah]].\n"),
2295 );
2296 // Link with .md extension (accepted, warned by validate).
2297 write(
2298 root,
2299 "records/concepts/t.md",
2300 &format!(
2301 "---\ntype: concept\nmeta-type: conclusion\nsummary: s\n---\n[[{target}.md]]\n"
2302 ),
2303 );
2304 // A catalog/index file also contains the link literally — included.
2305 write(
2306 root,
2307 "records/contacts/index.md",
2308 &format!("---\ntype: index\n---\n- [[{target}]] — Sarah\n"),
2309 );
2310 // No link to the target.
2311 write(
2312 root,
2313 "records/profiles/elena.md",
2314 "---\ntype: profile\nmeta-type: conclusion\nsummary: s\n---\nNo links here.\n",
2315 );
2316 // Short-form link must NOT match the full-path target.
2317 write(
2318 root,
2319 "records/profiles/bob.md",
2320 "---\ntype: profile\nmeta-type: conclusion\nsummary: s\n---\n[[sarah-chen]]\n",
2321 );
2322 // A longer path that merely starts with the target must NOT match
2323 // (boundary correctness): target `sarah-chen` vs `sarah-chen-jr`.
2324 write(
2325 root,
2326 "records/profiles/jr.md",
2327 &format!(
2328 "---\ntype: profile\nmeta-type: conclusion\nsummary: s\n---\n[[{target}-jr]]\n"
2329 ),
2330 );
2331
2332 let store = open(&dir);
2333 let got = rels(&store.find_links_to(Path::new(target)).unwrap());
2334 assert_eq!(
2335 got,
2336 vec![
2337 "records/concepts/t.md".to_string(),
2338 "records/contacts/index.md".to_string(),
2339 "records/meetings/2026/05/m.md".to_string(),
2340 "records/profiles/sarah.md".to_string(),
2341 ]
2342 );
2343 }
2344
2345 #[test]
2346 fn find_links_to_distinguishes_sibling_paths() {
2347 // Two contacts whose paths share a prefix; a link to one must not be
2348 // reported as a link to the other.
2349 let dir = empty_store();
2350 let root = dir.path();
2351 write(
2352 root,
2353 "records/concepts/a.md",
2354 "---\ntype: concept\nmeta-type: conclusion\nsummary: s\n---\n[[records/contacts/sarah]]\n",
2355 );
2356 write(
2357 root,
2358 "records/concepts/b.md",
2359 "---\ntype: concept\nmeta-type: conclusion\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
2360 );
2361 let store = open(&dir);
2362
2363 assert_eq!(
2364 rels(
2365 &store
2366 .find_links_to(Path::new("records/contacts/sarah"))
2367 .unwrap()
2368 ),
2369 vec!["records/concepts/a.md".to_string()]
2370 );
2371 assert_eq!(
2372 rels(
2373 &store
2374 .find_links_to(Path::new("records/contacts/sarah-chen"))
2375 .unwrap()
2376 ),
2377 vec!["records/concepts/b.md".to_string()]
2378 );
2379 }
2380
2381 #[test]
2382 fn regression_find_links_to_tolerates_invalid_utf8_on_a_matched_line() {
2383 // Regression: a `.md` file can carry a stray non-UTF-8 byte on the SAME
2384 // line as a `[[target]]` link (a verbatim-ingested `sources/` artifact,
2385 // e.g. a mis-decoded Latin-1 import). The scan must still report the
2386 // link — `find_links_to` / `find_links_to_any` (and `graph backlinks` +
2387 // the working-set validate incoming-linker pass) must not error out and
2388 // drop the legitimate UTF-8 linkers. The content scan reads the file
2389 // with `String::from_utf8_lossy`, so the invalid byte becomes a
2390 // replacement char and the ASCII `[[target]]` link is still extracted.
2391 let dir = empty_store();
2392 let root = dir.path();
2393 let target = "records/contacts/sarah-chen";
2394
2395 // A clean, fully-UTF-8 linker that MUST be returned regardless.
2396 write(
2397 root,
2398 "records/profiles/clean.md",
2399 &format!(
2400 "---\ntype: profile\nmeta-type: conclusion\nsummary: s\n---\nSee [[{target}]].\n"
2401 ),
2402 );
2403
2404 // A linker whose link line ALSO carries a stray 0xFF byte (a mis-decoded
2405 // Latin-1 import). Write raw bytes so the invalid byte survives — a
2406 // `&str` fixture could not express it. The byte-level regex still
2407 // matches `[[target]]` on this line; pre-fix the UTF8 sink aborted here.
2408 let mut bytes: Vec<u8> =
2409 b"---\ntype: email\nsummary: s\n---\nSee [[records/contacts/sarah-chen]] \xFF here\n"
2410 .to_vec();
2411 let dirty_abs = root.join("sources/emails/2026/05/raw.md");
2412 fs::create_dir_all(dirty_abs.parent().unwrap()).unwrap();
2413 fs::write(&dirty_abs, &bytes).unwrap();
2414 // Defensive: confirm the fixture really is invalid UTF-8 (so the test
2415 // exercises the bug, not a coincidentally-valid file).
2416 assert!(
2417 std::str::from_utf8(&bytes).is_err(),
2418 "fixture must contain invalid UTF-8 to exercise the regression"
2419 );
2420 bytes.clear();
2421
2422 let store = open(&dir);
2423 let got = rels(
2424 &store
2425 .find_links_to(Path::new(target))
2426 .expect("a stray non-UTF-8 byte must not abort the backlink scan"),
2427 );
2428 assert_eq!(
2429 got,
2430 vec![
2431 "records/profiles/clean.md".to_string(),
2432 "sources/emails/2026/05/raw.md".to_string(),
2433 ],
2434 "both the clean linker and the one with an invalid byte on the link \
2435 line are reported; the scan degrades, it does not fail"
2436 );
2437 }
2438
2439 // ── find_links_to_any (batch — the O(changed × store) fix) ─────────────────
2440
2441 /// The working-set validate's incoming-linker discovery runs through
2442 /// `find_links_to_any` over the WHOLE changed set in one pass. This pins the
2443 /// batch contract that makes that single-pass behavior correct: the result is
2444 /// the union of incoming linkers across every target, with per-target
2445 /// boundary correctness preserved (no alternation arm bleeds into a
2446 /// prefix-sharing sibling). If a regression reverts the batch finder to a
2447 /// per-object loop, the union below would still hold — but the boundary +
2448 /// union-equivalence assertions are what guard the *correctness* of folding N
2449 /// scans into one regex.
2450 #[test]
2451 fn find_links_to_any_returns_the_union_with_boundary_correctness() {
2452 let dir = empty_store();
2453 let root = dir.path();
2454
2455 // Two distinct targets, each with its own linker.
2456 write(
2457 root,
2458 "records/concepts/links-sarah.md",
2459 "---\ntype: concept\nmeta-type: conclusion\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
2460 );
2461 write(
2462 root,
2463 "records/concepts/links-acme.md",
2464 "---\ntype: concept\nmeta-type: conclusion\nsummary: s\n---\nDeal with [[records/companies/acme|Acme]].\n",
2465 );
2466 // One file links to BOTH targets — must appear exactly once (deduped),
2467 // proving the per-file early-exit folds multiple-target hits into a
2468 // single result row rather than one row per matched target.
2469 write(
2470 root,
2471 "records/meetings/2026/05/m.md",
2472 "---\ntype: meeting\nsummary: s\n---\n[[records/contacts/sarah-chen]] re \
2473 [[records/companies/acme]]\n",
2474 );
2475 // A prefix-sharing sibling of a target: a link to `sarah-chen-jr` must NOT
2476 // be reported as a link to `sarah-chen` even though the alternation now
2477 // carries `sarah-chen` as one arm.
2478 write(
2479 root,
2480 "records/concepts/links-jr.md",
2481 "---\ntype: concept\nmeta-type: conclusion\nsummary: s\n---\n[[records/contacts/sarah-chen-jr]]\n",
2482 );
2483 // A file that links to neither requested target.
2484 write(
2485 root,
2486 "records/concepts/unrelated.md",
2487 "---\ntype: concept\nmeta-type: conclusion\nsummary: s\n---\n[[records/concepts/spend]]\n",
2488 );
2489
2490 let store = open(&dir);
2491 let targets = vec![
2492 PathBuf::from("records/contacts/sarah-chen"),
2493 PathBuf::from("records/companies/acme"),
2494 ];
2495
2496 let got = rels(&store.find_links_to_any(&targets).unwrap());
2497 assert_eq!(
2498 got,
2499 vec![
2500 "records/concepts/links-acme.md".to_string(),
2501 "records/concepts/links-sarah.md".to_string(),
2502 "records/meetings/2026/05/m.md".to_string(),
2503 ],
2504 "batch finder must return the deduped union of linkers across all \
2505 targets, excluding the prefix-sibling and the unrelated file"
2506 );
2507
2508 // Equivalence: the batch result must equal the union of the per-target
2509 // single finder. This is the property the working-set path relies on
2510 // when it folds one-scan-per-object into one scan for the whole set.
2511 let mut union: std::collections::BTreeSet<PathBuf> = std::collections::BTreeSet::new();
2512 for t in &targets {
2513 for linker in store.find_links_to(t).unwrap() {
2514 union.insert(linker);
2515 }
2516 }
2517 assert_eq!(
2518 rels(&union.into_iter().collect::<Vec<_>>()),
2519 got,
2520 "find_links_to_any must equal the union of per-target find_links_to"
2521 );
2522 }
2523
2524 /// An empty target set must scan nothing and find nothing — and crucially
2525 /// must NOT compile to a match-everything empty regex (which would report
2526 /// every `.md` as a linker). This is the empty-working-set fast path the
2527 /// `validate` loop hits when nothing changed.
2528 #[test]
2529 fn find_links_to_any_empty_targets_matches_nothing() {
2530 let dir = empty_store();
2531 let root = dir.path();
2532 write(
2533 root,
2534 "records/concepts/a.md",
2535 "---\ntype: concept\nmeta-type: conclusion\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
2536 );
2537 let store = open(&dir);
2538
2539 assert!(
2540 store.find_links_to_any(&[]).unwrap().is_empty(),
2541 "no targets ⇒ no linkers (an empty pattern must not match every file)"
2542 );
2543 // A set of only empty/non-link targets is likewise a no-op, not a
2544 // match-everything.
2545 assert!(
2546 store
2547 .find_links_to_any(&[PathBuf::from(""), PathBuf::from("./")])
2548 .unwrap()
2549 .is_empty(),
2550 "targets that render to empty link text contribute no alternation arm"
2551 );
2552 }
2553
2554 // ── read_type_index ──────────────────────────────────────────────────────
2555
2556 #[test]
2557 fn read_type_index_parses_records_and_flattens_fields() {
2558 let dir = empty_store();
2559 let root = dir.path();
2560 let jsonl = "\
2561{\"path\":\"records/expenses/2026/05/a.md\",\"type\":\"expense\",\"summary\":\"lunch\",\"tags\":[\"meals\"],\"links\":[\"records/companies/acme\"],\"created\":\"2026-05-01T00:00:00Z\",\"updated\":\"2026-05-01T00:00:00Z\",\"vendor\":\"acme\",\"amount\":42}
2562{\"path\":\"records/expenses/2026/05/b.md\",\"type\":\"expense\",\"summary\":\"taxi\",\"created\":null,\"updated\":null,\"vendor\":\"yellow\"}
2563";
2564 let p = write(root, "records/expenses/index.jsonl", jsonl);
2565 let store = open(&dir);
2566 let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
2567
2568 assert_eq!(recs.len(), 2);
2569 // Sorted by path asc.
2570 assert_eq!(recs[0].path, PathBuf::from("records/expenses/2026/05/a.md"));
2571 assert_eq!(recs[0].type_, "expense");
2572 assert_eq!(recs[0].summary, "lunch");
2573 assert_eq!(recs[0].tags, vec!["meals".to_string()]);
2574 assert_eq!(recs[0].links, vec!["records/companies/acme".to_string()]);
2575 assert!(recs[0].created.is_some());
2576 // Extra (non-typed) frontmatter flattens into `fields`.
2577 assert_eq!(
2578 recs[0].fields.get("vendor"),
2579 Some(&serde_json::json!("acme"))
2580 );
2581 assert_eq!(recs[0].fields.get("amount"), Some(&serde_json::json!(42)));
2582 // Defaults: missing tags/links → empty.
2583 assert!(recs[1].tags.is_empty());
2584 assert!(recs[1].links.is_empty());
2585 }
2586
2587 #[test]
2588 fn read_type_index_last_write_wins_and_skips_blanks() {
2589 let dir = empty_store();
2590 let root = dir.path();
2591 // Same path twice; the second line supersedes the first. A blank line
2592 // in between must be ignored, not error.
2593 let jsonl = "\
2594{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"old\",\"created\":null,\"updated\":null}
2595
2596{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"new\",\"created\":null,\"updated\":null}
2597";
2598 let p = write(root, "records/contacts/index.jsonl", jsonl);
2599 let store = open(&dir);
2600 let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
2601 assert_eq!(recs.len(), 1, "duplicate path collapses to one record");
2602 assert_eq!(recs[0].summary, "new", "later line must win");
2603 }
2604
2605 #[test]
2606 fn read_type_index_errors_on_malformed_line() {
2607 let dir = empty_store();
2608 let root = dir.path();
2609 let p = write(root, "records/contacts/index.jsonl", "{not valid json}\n");
2610 let store = open(&dir);
2611 let err = store.read_type_index(&store.abs_path(&p)).unwrap_err();
2612 assert!(matches!(err, StoreError::BadTypeIndex { .. }));
2613 }
2614
2615 // ── find_by_type / find_by_where ─────────────────────────────────────────
2616
2617 fn jsonl_line(path: &str, type_: &str, summary: &str, extra: &str) -> String {
2618 format!(
2619 "{{\"path\":\"{path}\",\"type\":\"{type_}\",\"summary\":\"{summary}\",\"created\":null,\"updated\":null{extra}}}\n"
2620 )
2621 }
2622
2623 #[test]
2624 fn find_by_type_reads_canonical_folder_sidecar() {
2625 let dir = empty_store();
2626 let root = dir.path();
2627 // Canonical folder for `contact` is records/contacts.
2628 write(
2629 root,
2630 "records/contacts/index.jsonl",
2631 &(jsonl_line("records/contacts/sarah.md", "contact", "Sarah", "")
2632 + &jsonl_line("records/contacts/elena.md", "contact", "Elena", "")),
2633 );
2634 // A different type's sidecar must not leak into a contact query.
2635 write(
2636 root,
2637 "records/companies/index.jsonl",
2638 &jsonl_line("records/companies/acme.md", "company", "Acme", ""),
2639 );
2640 let store = open(&dir);
2641 let recs = store.find_by_type("contact").unwrap();
2642 let names: Vec<_> = recs.iter().map(|r| r.summary.clone()).collect();
2643 assert_eq!(names, vec!["Elena".to_string(), "Sarah".to_string()]); // path-sorted
2644 assert!(recs.iter().all(|r| r.type_ == "contact"));
2645 }
2646
2647 #[test]
2648 fn regression_find_by_type_includes_non_canonical_folder_when_canonical_exists() {
2649 // Regression for the silent-incompleteness bug: once the canonical
2650 // type-folder sidecar exists, `find_by_type` used to read ONLY that
2651 // sidecar and drop same-type records filed in a non-canonical folder in
2652 // the SAME layer — so the result flipped to incomplete the moment a
2653 // canonical record was added. The write path actively enables such a
2654 // layout (`records/clients/` for a `contact`, any `records/<folder>/`
2655 // for a conclusion `profile`), so this is a reachable, dedup-breaking
2656 // omission.
2657 let dir = empty_store();
2658 let root = dir.path();
2659
2660 // CANONICAL folder sidecar exists (`records/contacts/` for `contact`),
2661 // which is exactly the condition that triggered the bug.
2662 write(
2663 root,
2664 "records/contacts/index.jsonl",
2665 &jsonl_line("records/contacts/sarah.md", "contact", "Sarah", ""),
2666 );
2667 // A `contact` filed in a NON-canonical folder within the same (Records)
2668 // layer. Pre-fix this was silently dropped because the canonical
2669 // sidecar existed; it must now come back.
2670 write(
2671 root,
2672 "records/clients/index.jsonl",
2673 &jsonl_line("records/clients/elena.md", "contact", "Elena", ""),
2674 );
2675 // A different type in the same layer must NOT leak in (proves the read
2676 // is type-filtered, not just a blind whole-layer dump).
2677 write(
2678 root,
2679 "records/companies/index.jsonl",
2680 &jsonl_line("records/companies/acme.md", "company", "Acme", ""),
2681 );
2682
2683 let store = open(&dir);
2684 let got: std::collections::BTreeSet<String> = store
2685 .find_by_type("contact")
2686 .unwrap()
2687 .into_iter()
2688 .map(|r| r.path.to_string_lossy().into_owned())
2689 .collect();
2690 assert_eq!(
2691 got,
2692 ["records/clients/elena.md", "records/contacts/sarah.md"]
2693 .into_iter()
2694 .map(String::from)
2695 .collect::<std::collections::BTreeSet<_>>(),
2696 "both the canonical-folder and the non-canonical-folder contact must \
2697 be returned; the company record must be excluded"
2698 );
2699 }
2700
2701 #[test]
2702 fn regression_find_by_type_profile_spans_multiple_topic_folders() {
2703 // Regression for the scoped-backlinks variant of the same bug
2704 // (`graph backlinks --type <conclusion-type>`): a conclusion type like
2705 // `profile` has the canonical fallback folder `records/profile`, but the
2706 // agent may file profiles under ANY records topic folder
2707 // (`records/people/`, `records/clients/`, …). With a
2708 // `records/profile/index.jsonl` present, the old code read only that
2709 // folder and dropped profiles in the other topic folders —
2710 // under-reporting dependents in a blast-radius check. The
2711 // whole-`records/`-layer read must surface all of them.
2712 let dir = empty_store();
2713 let root = dir.path();
2714 write(
2715 root,
2716 "records/profile/index.jsonl",
2717 &jsonl_line("records/profile/billing.md", "profile", "Billing", ""),
2718 );
2719 write(
2720 root,
2721 "records/people/index.jsonl",
2722 &jsonl_line("records/people/sarah-chen.md", "profile", "Sarah Chen", ""),
2723 );
2724 write(
2725 root,
2726 "records/clients/index.jsonl",
2727 &jsonl_line("records/clients/atlas.md", "profile", "Atlas", ""),
2728 );
2729
2730 let store = open(&dir);
2731 let got: std::collections::BTreeSet<String> = store
2732 .find_by_type("profile")
2733 .unwrap()
2734 .into_iter()
2735 .map(|r| r.path.to_string_lossy().into_owned())
2736 .collect();
2737 assert_eq!(
2738 got,
2739 [
2740 "records/clients/atlas.md",
2741 "records/people/sarah-chen.md",
2742 "records/profile/billing.md",
2743 ]
2744 .into_iter()
2745 .map(String::from)
2746 .collect::<std::collections::BTreeSet<_>>(),
2747 "a profile query must return records from every topic folder, not \
2748 just the canonical records/profile/"
2749 );
2750 }
2751
2752 #[test]
2753 fn find_by_type_canonical_absent_falls_back_within_the_layer_only() {
2754 let dir = empty_store();
2755 let root = dir.path();
2756 // A custom `proposal` record filed under a non-canonical folder NAME
2757 // (the natural plural `records/proposals/`) inside the records layer.
2758 // `default_type_folder("proposal")` = `records/proposal` (bare type, no
2759 // pluralization guess), so the canonical sidecar does not exist and
2760 // `find_by_type` falls back. The fallback is bounded to the type's
2761 // layer (records), so this record — same layer, non-canonical folder —
2762 // is still found: completeness within the layer holds.
2763 write(
2764 root,
2765 "records/proposals/index.jsonl",
2766 &jsonl_line("records/proposals/p1.md", "proposal", "Q3 proposal", ""),
2767 );
2768 // A DECOY of the SAME type sitting in a DIFFERENT layer (sources/). The
2769 // old whole-store fallback read every sidecar in the store and would
2770 // have leaked this into the result; the layer-bounded fallback must not.
2771 // It also pins that the fallback is O(entities-in-layer), never O(store).
2772 write(
2773 root,
2774 "sources/proposals/index.jsonl",
2775 &jsonl_line(
2776 "sources/proposals/leak.md",
2777 "proposal",
2778 "cross-layer decoy",
2779 "",
2780 ),
2781 );
2782 let store = open(&dir);
2783 let recs = store.find_by_type("proposal").unwrap();
2784 assert_eq!(
2785 recs.len(),
2786 1,
2787 "only the records-layer proposal, not the sources decoy"
2788 );
2789 assert_eq!(recs[0].summary, "Q3 proposal");
2790 assert_eq!(recs[0].path, PathBuf::from("records/proposals/p1.md"));
2791 }
2792
2793 #[test]
2794 fn find_by_type_canonical_absent_does_not_read_other_layers() {
2795 let dir = empty_store();
2796 let root = dir.path();
2797 // `email`'s canonical folder is `sources/emails` (layer Sources). No
2798 // sidecar there yet, so `find_by_type("email")` falls back — but only
2799 // within the Sources layer. A populated sidecar in the Records layer
2800 // must never be touched: the fallback is layer-bounded, not store-wide.
2801 // Under the old `read_all_type_indexes_in(None)` fallback this records
2802 // sidecar would have been read and filtered (wasted O(store) I/O); now
2803 // it is outside the walk root entirely.
2804 write(
2805 root,
2806 "records/contacts/index.jsonl",
2807 &jsonl_line("records/contacts/sarah.md", "contact", "Sarah", ""),
2808 );
2809 let store = open(&dir);
2810 // No email anywhere ⇒ empty, and the records layer was not in scope.
2811 assert!(store.find_by_type("email").unwrap().is_empty());
2812 }
2813
2814 #[test]
2815 fn find_by_where_matches_typed_columns_and_flat_fields() {
2816 let dir = empty_store();
2817 let root = dir.path();
2818 write(
2819 root,
2820 "records/expenses/index.jsonl",
2821 &(jsonl_line(
2822 "records/expenses/a.md",
2823 "expense",
2824 "lunch",
2825 ",\"vendor\":\"acme\",\"tags\":[\"meals\"]",
2826 ) + &jsonl_line(
2827 "records/expenses/b.md",
2828 "expense",
2829 "taxi",
2830 ",\"vendor\":\"yellow\"",
2831 )),
2832 );
2833 write(
2834 root,
2835 "records/contacts/index.jsonl",
2836 &jsonl_line(
2837 "records/contacts/sarah.md",
2838 "contact",
2839 "Sarah",
2840 ",\"tags\":[\"customer\"]",
2841 ),
2842 );
2843 let store = open(&dir);
2844
2845 // Flat field in `fields`.
2846 let by_vendor = store.find_by_where("vendor", "acme").unwrap();
2847 assert_eq!(by_vendor.len(), 1);
2848 assert_eq!(by_vendor[0].path, PathBuf::from("records/expenses/a.md"));
2849
2850 // Typed column: type (spans both expense records).
2851 assert_eq!(store.find_by_where("type", "expense").unwrap().len(), 2);
2852
2853 // Typed list column: tags membership.
2854 let customers = store.find_by_where("tags", "customer").unwrap();
2855 assert_eq!(customers.len(), 1);
2856 assert_eq!(
2857 customers[0].path,
2858 PathBuf::from("records/contacts/sarah.md")
2859 );
2860
2861 // No match → empty.
2862 assert!(store.find_by_where("vendor", "nobody").unwrap().is_empty());
2863 }
2864
2865 #[test]
2866 fn find_by_where_matches_timestamps_across_rfc3339_spellings() {
2867 let dir = empty_store();
2868 let root = dir.path();
2869 // db.md files most commonly carry the `Z` UTC spelling. The index.jsonl
2870 // serialized from such a file preserves it verbatim.
2871 write(
2872 root,
2873 "records/meetings/index.jsonl",
2874 "{\"path\":\"records/meetings/kickoff.md\",\"type\":\"meeting\",\
2875\"summary\":\"kickoff\",\"created\":\"2026-05-01T00:00:00Z\",\
2876\"updated\":\"2026-05-02T09:30:00-07:00\"}\n",
2877 );
2878 let store = open(&dir);
2879
2880 // The exact value an agent reads out of the file (`Z` form) must match.
2881 let by_z = store
2882 .find_by_where("created", "2026-05-01T00:00:00Z")
2883 .unwrap();
2884 assert_eq!(by_z.len(), 1);
2885 assert_eq!(by_z[0].path, PathBuf::from("records/meetings/kickoff.md"));
2886
2887 // The equivalent explicit-offset spelling of the same instant matches too.
2888 assert_eq!(
2889 store
2890 .find_by_where("created", "2026-05-01T00:00:00+00:00")
2891 .unwrap()
2892 .len(),
2893 1
2894 );
2895
2896 // A non-UTC stored value matches both its own offset spelling and the
2897 // same instant expressed as `Z` (instant comparison, not string compare).
2898 assert_eq!(
2899 store
2900 .find_by_where("updated", "2026-05-02T09:30:00-07:00")
2901 .unwrap()
2902 .len(),
2903 1
2904 );
2905 assert_eq!(
2906 store
2907 .find_by_where("updated", "2026-05-02T16:30:00Z")
2908 .unwrap()
2909 .len(),
2910 1
2911 );
2912
2913 // A different instant does not match.
2914 assert!(store
2915 .find_by_where("created", "2026-05-01T00:00:01Z")
2916 .unwrap()
2917 .is_empty());
2918 // A non-RFC3339 query value never matches a real timestamp.
2919 assert!(store
2920 .find_by_where("created", "2026-05-01")
2921 .unwrap()
2922 .is_empty());
2923 }
2924
2925 #[test]
2926 fn find_by_where_matches_floats_across_serialized_spellings() {
2927 // Adversarial review #5: a float field is stored in index.jsonl via
2928 // serde_json's canonical f64 render, which DISCARDS the file's source
2929 // spelling (`1234.00` -> `1234.0`, `1e3` -> `1000.0`). A textual compare
2930 // made the spelling a human reads in the file miss (and disagree with
2931 // free-text `search`); numeric compare fixes it. `fm query`/`index query`
2932 // is the SPEC pre-write dedup primitive, so a miss here silently writes a
2933 // duplicate record.
2934 let dir = empty_store();
2935 let root = dir.path();
2936 write(
2937 root,
2938 "records/invoices/index.jsonl",
2939 "{\"path\":\"records/invoices/inv.md\",\"type\":\"invoice\",\
2940\"summary\":\"inv\",\"amount\":1234.0,\"score\":1000.0,\"count\":42}\n",
2941 );
2942 let store = open(&dir);
2943
2944 // Every spelling of the same numeric value matches the canonical-f64 store.
2945 for spelling in ["1234.00", "1234.0", "1234"] {
2946 assert_eq!(
2947 store.find_by_where("amount", spelling).unwrap().len(),
2948 1,
2949 "amount spelling `{spelling}` must match the stored 1234.0"
2950 );
2951 }
2952 for spelling in ["1e3", "1000", "1000.0"] {
2953 assert_eq!(
2954 store.find_by_where("score", spelling).unwrap().len(),
2955 1,
2956 "score spelling `{spelling}` must match the stored 1000.0"
2957 );
2958 }
2959 // A genuinely different value does not match.
2960 assert!(store.find_by_where("amount", "1234.5").unwrap().is_empty());
2961 // Integer fields keep exact textual matching (unaffected by the fix).
2962 assert_eq!(store.find_by_where("count", "42").unwrap().len(), 1);
2963 }
2964
2965 #[test]
2966 fn number_matches_is_numeric_for_floats_but_exact_for_integers() {
2967 use serde_json::Number;
2968 // Float-valued field: any equal spelling matches (the bug fix).
2969 let f: Number = serde_json::from_str("1234.0").unwrap();
2970 assert!(number_matches(&f, "1234.00"));
2971 assert!(number_matches(&f, "1234"));
2972 assert!(number_matches(&f, "1234.0"));
2973 assert!(!number_matches(&f, "1234.5"));
2974 // Integer-valued field: EXACT textual compare, never f64-rounded — two
2975 // adjacent large integers that round to the same f64 must NOT collide
2976 // (the safety property that motivates restricting numeric compare to
2977 // floats).
2978 let big: Number = serde_json::from_str("18446744073709551615").unwrap(); // u64::MAX
2979 assert!(number_matches(&big, "18446744073709551615"));
2980 assert!(!number_matches(&big, "18446744073709551614"));
2981 }
2982
2983 #[test]
2984 fn find_by_where_in_layer_reads_only_that_layers_sidecars() {
2985 // The O(entities-in-layer) contract: a layer-scoped where read must walk
2986 // ONLY the named layer's subtree. Proven structurally — a *malformed*
2987 // sidecar in another layer would make `read_type_index` error if it were
2988 // read, so a scoped read that succeeds (and excludes that record) is
2989 // proof the other layer's I/O never happened.
2990 let dir = empty_store();
2991 let root = dir.path();
2992 write(
2993 root,
2994 "records/companies/index.jsonl",
2995 &jsonl_line(
2996 "records/companies/acme.md",
2997 "company",
2998 "Acme",
2999 ",\"domain\":\"acme.com\"",
3000 ),
3001 );
3002 // Same field/value in the sources layer — but the sidecar is corrupt.
3003 write(
3004 root,
3005 "sources/emails/index.jsonl",
3006 "{ this is not valid json and would error if read }\n",
3007 );
3008 let store = open(&dir);
3009
3010 // Scoped to records: the corrupt sources sidecar is out of scope, so the
3011 // read succeeds and returns only the records-layer match.
3012 let in_records = store
3013 .find_by_where_in("domain", "acme.com", Some(Layer::Records))
3014 .expect("a records-scoped read must not touch the sources sidecar");
3015 assert_eq!(
3016 rels(
3017 &in_records
3018 .iter()
3019 .map(|r| r.path.clone())
3020 .collect::<Vec<_>>()
3021 ),
3022 vec!["records/companies/acme.md".to_string()]
3023 );
3024
3025 // The store-wide read DOES reach the corrupt sidecar and surfaces it as
3026 // a parse error — confirming the corrupt file is genuinely in the tree
3027 // and that only the layer scope spares it.
3028 let store_wide = store.find_by_where("domain", "acme.com");
3029 assert!(
3030 matches!(store_wide, Err(StoreError::BadTypeIndex { .. })),
3031 "unscoped read walks every layer and hits the corrupt sidecar"
3032 );
3033
3034 // Scoping to the layer that holds only the corrupt sidecar still errors
3035 // (the scope includes it), proving the scope is a real subtree bound and
3036 // not a silent "skip anything that fails".
3037 let in_sources = store.find_by_where_in("domain", "acme.com", Some(Layer::Sources));
3038 assert!(matches!(in_sources, Err(StoreError::BadTypeIndex { .. })));
3039 }
3040
3041 #[test]
3042 fn find_by_where_in_missing_layer_is_empty_not_an_error() {
3043 // A layer-scoped read over a layer folder that does not exist yet must
3044 // return empty (mirrors `walk_layer`'s missing-dir guard), never a walk
3045 // error from `ignore` over a nonexistent path.
3046 let dir = empty_store();
3047 let root = dir.path();
3048 write(
3049 root,
3050 "records/contacts/index.jsonl",
3051 &jsonl_line(
3052 "records/contacts/sarah.md",
3053 "contact",
3054 "Sarah",
3055 ",\"city\":\"denver\"",
3056 ),
3057 );
3058 let store = open(&dir);
3059
3060 // `sources/` was never created.
3061 let in_sources = store
3062 .find_by_where_in("city", "denver", Some(Layer::Sources))
3063 .expect("missing layer subtree is empty, not an error");
3064 assert!(in_sources.is_empty());
3065
3066 // Same query scoped to the layer that has the record still finds it.
3067 let in_records = store
3068 .find_by_where_in("city", "denver", Some(Layer::Records))
3069 .unwrap();
3070 assert_eq!(in_records.len(), 1);
3071 }
3072
3073 // ── abs_path / rel_path ──────────────────────────────────────────────────
3074
3075 #[test]
3076 fn abs_and_rel_path_roundtrip() {
3077 let dir = empty_store();
3078 let store = open(&dir);
3079 let rel = Path::new("records/contacts/sarah.md");
3080 let abs = store.abs_path(rel);
3081 assert_eq!(abs, dir.path().join(rel));
3082 assert_eq!(store.rel_path(&abs).as_deref(), Some(rel));
3083
3084 // An absolute path is passed through unchanged by abs_path.
3085 assert_eq!(store.abs_path(&abs), abs);
3086
3087 // A path outside the store has no store-relative form.
3088 assert_eq!(store.rel_path(Path::new("/somewhere/else.md")), None);
3089 }
3090
3091 // ── infer_type_from_path (inverse of default_type_folder) ────────────────
3092
3093 #[test]
3094 fn infer_type_maps_every_recognized_folder_back_to_its_type() {
3095 let cases = [
3096 ("sources/emails/x.md", "email"),
3097 ("sources/transcripts/x.md", "transcript"),
3098 ("sources/docs/x.md", "pdf-source"),
3099 ("sources/notes/x.md", "note"),
3100 ("records/contacts/x.md", "contact"),
3101 ("records/companies/x.md", "company"),
3102 ("records/expenses/x.md", "expense"),
3103 ("records/meetings/x.md", "meeting"),
3104 ("records/decisions/x.md", "decision"),
3105 ("records/invoices/x.md", "invoice"),
3106 ];
3107 for (path, expected) in cases {
3108 assert_eq!(
3109 infer_type_from_path(Path::new(path)).as_deref(),
3110 Some(expected),
3111 "path {path} should infer type {expected}"
3112 );
3113 }
3114 }
3115
3116 #[test]
3117 fn infer_type_round_trips_with_default_type_folder() {
3118 // The canonical invariant: inference is the inverse of the forward map.
3119 // Every recognized type, routed through `default_type_folder` and then
3120 // back through `infer_type_from_path`, must return the original type.
3121 let recognized = [
3122 "email",
3123 "transcript",
3124 "pdf-source",
3125 "contact",
3126 "company",
3127 "expense",
3128 "meeting",
3129 "decision",
3130 "invoice",
3131 ];
3132 for type_ in recognized {
3133 let folder = default_type_folder(type_);
3134 let file = folder.join("x.md");
3135 assert_eq!(
3136 infer_type_from_path(&file).as_deref(),
3137 Some(type_),
3138 "recognized type {type_} (folder {folder:?}) must round-trip"
3139 );
3140 }
3141 }
3142
3143 #[test]
3144 fn infer_type_round_trips_custom_types_verbatim_no_singularization() {
3145 // Regression guard for the CLI/core divergence: `default_type_folder`'s
3146 // unrecognized fallback is the BARE type name (`task → records/task`,
3147 // `tasks → records/tasks`). Inference must NOT singularize, or a custom
3148 // type would not round-trip (e.g. `records/tasks` → `task` would clash
3149 // with `default_type_folder("task") → records/task`).
3150 for custom in ["task", "tasks", "playbook", "process", "okrs", "ticket"] {
3151 let folder = default_type_folder(custom);
3152 assert_eq!(folder, PathBuf::from("records").join(custom));
3153 let file = folder.join("x.md");
3154 assert_eq!(
3155 infer_type_from_path(&file).as_deref(),
3156 Some(custom),
3157 "custom type {custom} must round-trip verbatim (no singularization)"
3158 );
3159 }
3160
3161 // The specific case named in the finding: a plural custom folder keeps
3162 // its trailing `s`; it is NOT singularized to `task`.
3163 assert_eq!(
3164 infer_type_from_path(Path::new("records/tasks/x.md")).as_deref(),
3165 Some("tasks"),
3166 "records/tasks must infer `tasks`, not `task`"
3167 );
3168 }
3169
3170 #[test]
3171 fn infer_type_requires_three_component_layer_folder_file_shape() {
3172 // Fewer than 3 components: a file directly under a layer has no
3173 // type-folder, so inference yields None (matches the old CLI contract).
3174 assert_eq!(infer_type_from_path(Path::new("records/x.md")), None);
3175 assert_eq!(infer_type_from_path(Path::new("sources/x.md")), None);
3176 assert_eq!(infer_type_from_path(Path::new("x.md")), None);
3177 // Unknown leading layer is never inferred.
3178 assert_eq!(infer_type_from_path(Path::new("foo/bar/x.md")), None);
3179 // Deeper paths still infer from the first type-folder segment (e.g. a
3180 // sharded record under records/expenses/2026/05/x.md).
3181 assert_eq!(
3182 infer_type_from_path(Path::new("records/expenses/2026/05/x.md")).as_deref(),
3183 Some("expense"),
3184 );
3185 }
3186
3187 // ── ensure_path_within_store (containment) ───────────────────────────────
3188
3189 #[test]
3190 fn ensure_path_within_store_accepts_in_store_and_rejects_escape() {
3191 let dir = tempdir().unwrap();
3192 let root = dir.path();
3193 fs::create_dir_all(root.join("records/contacts")).unwrap();
3194 fs::write(root.join("records/contacts/sarah.md"), "x").unwrap();
3195
3196 // An existing in-store file resolves and is accepted.
3197 let inside = root.join("records/contacts/sarah.md");
3198 let got = ensure_path_within_store(root, &inside).expect("in-store path accepted");
3199 // Canonical, but still under the (canonical) root.
3200 assert!(got.starts_with(root.canonicalize().unwrap()));
3201
3202 // A not-yet-existing in-store leaf is accepted (rename destination).
3203 let new_leaf = root.join("records/contacts/sarah-chen.md");
3204 assert!(
3205 ensure_path_within_store(root, &new_leaf).is_ok(),
3206 "a non-existent in-store leaf must be accepted"
3207 );
3208
3209 // A `..`-escaping path is rejected even though its prefix exists.
3210 let escape = root.join("records/contacts/../../outside/secret.md");
3211 assert!(
3212 ensure_path_within_store(root, &escape).is_err(),
3213 "a `..`-escaping path must be rejected"
3214 );
3215 }
3216
3217 #[test]
3218 fn ensure_path_within_store_rejects_symlink_escape() {
3219 let dir = tempdir().unwrap();
3220 let root = dir.path().join("store");
3221 fs::create_dir_all(&root).unwrap();
3222 let outside_dir = dir.path().join("outside");
3223 fs::create_dir_all(&outside_dir).unwrap();
3224 let secret = outside_dir.join("secret.md");
3225 fs::write(&secret, "TOPSECRET").unwrap();
3226
3227 // A symlink inside the store that points OUTSIDE it must be rejected:
3228 // resolving the symlink lands outside the canonical root.
3229 #[cfg(unix)]
3230 {
3231 use std::os::unix::fs::symlink;
3232 let link = root.join("escape.md");
3233 symlink(&secret, &link).unwrap();
3234 assert!(
3235 ensure_path_within_store(&root, &link).is_err(),
3236 "a symlink resolving outside the store must be rejected"
3237 );
3238 }
3239 }
3240
3241 // ── shared link-edge notion (fence / whitespace / case) ──────────────────
3242
3243 #[test]
3244 fn extract_edge_targets_trims_inner_whitespace() {
3245 // Padded `[[ x ]]` is the same edge as `[[x]]`.
3246 assert_eq!(
3247 extract_edge_targets("See [[ records/contacts/sarah ]] today."),
3248 vec!["records/contacts/sarah".to_string()]
3249 );
3250 }
3251
3252 #[test]
3253 fn extract_edge_targets_skips_fenced_code_blocks() {
3254 // A `[[...]]` inside a ``` fence is a doc example, NOT an edge — matching
3255 // validate's body extractor.
3256 let body = "\
3257Real [[records/contacts/sarah]] link.
3258
3259```markdown
3260[[records/contacts/ghost-example]] is how you link.
3261```
3262
3263After fence [[records/companies/acme]].
3264";
3265 let got = extract_edge_targets(body);
3266 assert_eq!(
3267 got,
3268 vec![
3269 "records/contacts/sarah".to_string(),
3270 "records/companies/acme".to_string(),
3271 ],
3272 "fenced example link must not be an edge"
3273 );
3274 }
3275
3276 #[test]
3277 fn extract_edge_targets_frontmatter_fence_does_not_swallow_body_links() {
3278 // Regression: `search_by_link` / `forwardlinks` / `dbmd links` feed the
3279 // WHOLE file (frontmatter + body) here. A stray code-fence run inside a
3280 // frontmatter value must NOT open a markdown fence that swallows the
3281 // body's real wiki-links. Frontmatter links are still edges; a link
3282 // genuinely inside a BODY fence is still ignored.
3283 let file = "\
3284---
3285type: note
3286summary: \"a note\"
3287ref: \"[[records/contacts/sarah]]\"
3288snippet: \"```\"
3289---
3290
3291Body mentions [[records/companies/acme]].
3292
3293```
3294[[records/contacts/ghost-example]] inside a body fence.
3295```
3296
3297After fence [[records/contacts/dave]].
3298";
3299 let got = extract_edge_targets(file);
3300 assert_eq!(
3301 got,
3302 vec![
3303 "records/contacts/sarah".to_string(), // frontmatter edge
3304 "records/companies/acme".to_string(), // body edge AFTER the frontmatter ```
3305 "records/contacts/dave".to_string(), // body edge after a real body fence
3306 ],
3307 "a code fence inside frontmatter must not suppress body wiki-links, \
3308 and a real body-fenced link must still be ignored"
3309 );
3310 }
3311
3312 #[test]
3313 fn extract_edge_targets_handles_nested_indented_and_long_run_fences() {
3314 // Regression for the naive `starts_with("```")/("~~~")` toggle: a fence
3315 // nested inside another, an over-indented (>3 space) marker, and a
3316 // long-run fence wrapping a shorter inner one must all leave the block's
3317 // links un-extracted (validate treats the whole block as opaque). The
3318 // (char, run-length) tracker keys on the OPENING fence and closes only on
3319 // a matching char with run ≥ the opener.
3320
3321 // (a) A ```` ```` ````-run block (run 4) wrapping a ``` example (run 3).
3322 // The inner ``` does NOT close the outer run-4 fence, so both `[[...]]`
3323 // inside stay fenced.
3324 let nested = "\
3325Doc:
3326
3327````
3328```
3329[[records/contacts/bob]]
3330```
3331still fenced [[records/contacts/bob]]
3332````
3333
3334Real [[records/companies/acme]].
3335";
3336 assert_eq!(
3337 extract_edge_targets(nested),
3338 vec!["records/companies/acme".to_string()],
3339 "a nested ``` inside a ````-run fence must not leak the fenced links"
3340 );
3341
3342 // (b) A `~~~` block containing a ``` line (the standard way to document a
3343 // backtick fence). The inner backtick line must not flip the state.
3344 let tilde_wraps_backtick = "\
3345~~~
3346```
3347[[records/contacts/ghost]]
3348```
3349~~~
3350
3351After [[records/companies/acme]].
3352";
3353 assert_eq!(
3354 extract_edge_targets(tilde_wraps_backtick),
3355 vec!["records/companies/acme".to_string()],
3356 "a ``` line inside a ~~~ block must not invert the fence state"
3357 );
3358
3359 // (c) An over-indented ```` ``` ```` (4 spaces) is NOT a fence; the link
3360 // on the next line is live.
3361 let over_indented = " ```\nLive [[records/contacts/sarah]].\n";
3362 assert_eq!(
3363 extract_edge_targets(over_indented),
3364 vec!["records/contacts/sarah".to_string()],
3365 "a >3-space-indented ``` is not a fence opener"
3366 );
3367 }
3368
3369 #[test]
3370 fn canonical_link_target_strips_md_dotslash_and_trims() {
3371 assert_eq!(canonical_link_target(" records/x.md "), "records/x");
3372 assert_eq!(canonical_link_target("./records/y"), "records/y");
3373 assert_eq!(canonical_link_target("/records/z"), "records/z");
3374 }
3375
3376 #[test]
3377 fn link_edge_key_folds_case_only_on_case_insensitive_fs() {
3378 let a = link_edge_key("records/contacts/Sarah-Chen");
3379 let b = link_edge_key("records/contacts/sarah-chen");
3380 if fs_is_case_insensitive() {
3381 assert_eq!(a, b, "case-insensitive FS must fold the key");
3382 } else {
3383 assert_ne!(a, b, "case-sensitive FS must keep the key case-exact");
3384 }
3385 }
3386
3387 #[test]
3388 fn link_edge_key_unifies_nfc_and_nfd_normalization_forms() {
3389 // REGRESSION (Unicode encoding / silent graph break): on macOS/APFS a
3390 // file written in one Unicode normalization form and a link written in
3391 // the other name the SAME file (the FS folds NFC/NFD), but their raw
3392 // bytes differ. The edge comparison key must fold them to one key on
3393 // every platform, or the graph (backlinks/forwardlinks/orphans) keys the
3394 // two as different targets and silently misses the edge.
3395 let nfc = "records/contacts/jos\u{00e9}"; // é = U+00E9 (NFC)
3396 let nfd = "records/contacts/jose\u{0301}"; // e + U+0301 (NFD)
3397 // The two inputs are genuinely byte-different (the test would be vacuous
3398 // otherwise).
3399 assert_ne!(nfc, nfd, "test inputs must be byte-distinct NFC vs NFD");
3400 assert_eq!(
3401 link_edge_key(nfc),
3402 link_edge_key(nfd),
3403 "NFC and NFD spellings of the same name must produce one edge key"
3404 );
3405 }
3406
3407 // ── walk follows symlinked content ───────────────────────────────────────
3408
3409 #[cfg(unix)]
3410 #[test]
3411 fn walk_includes_symlinked_content_file_and_symlinked_folder() {
3412 use std::os::unix::fs::symlink;
3413 let dir = empty_store();
3414 let root = dir.path();
3415 // A regular file (control).
3416 write(
3417 root,
3418 "records/contacts/sarah.md",
3419 &content_md("2026-05-01T00:00:00Z"),
3420 );
3421 // A symlinked .md content file inside a real folder.
3422 let external_file = root.join("external-elena.md");
3423 fs::write(&external_file, content_md("2026-05-02T00:00:00Z")).unwrap();
3424 symlink(&external_file, root.join("records/contacts/elena.md")).unwrap();
3425 // A symlinked type folder.
3426 let external_dir = dir.path().join("external-companies");
3427 fs::create_dir_all(&external_dir).unwrap();
3428 fs::write(
3429 external_dir.join("acme.md"),
3430 content_md("2026-05-03T00:00:00Z"),
3431 )
3432 .unwrap();
3433 symlink(&external_dir, root.join("records/companies")).unwrap();
3434
3435 let store = open(&dir);
3436 let got = rels(&store.walk().unwrap());
3437 assert!(
3438 got.contains(&"records/contacts/elena.md".to_string()),
3439 "a symlinked content file must be walked: {got:?}"
3440 );
3441 assert!(
3442 got.contains(&"records/companies/acme.md".to_string()),
3443 "a file inside a symlinked type folder must be walked: {got:?}"
3444 );
3445 }
3446
3447 // ── find_links_to: padded / fenced / case ────────────────────────────────
3448
3449 #[test]
3450 fn find_links_to_matches_whitespace_padded_link() {
3451 let dir = empty_store();
3452 let root = dir.path();
3453 write(
3454 root,
3455 "records/profiles/a.md",
3456 "---\ntype: profile\nmeta-type: conclusion\nsummary: s\n---\nSee [[ records/contacts/sarah ]] today.\n",
3457 );
3458 let store = open(&dir);
3459 let got = rels(
3460 &store
3461 .find_links_to(Path::new("records/contacts/sarah"))
3462 .unwrap(),
3463 );
3464 assert_eq!(
3465 got,
3466 vec!["records/profiles/a.md".to_string()],
3467 "a padded `[[ x ]]` link must be found as a backward edge, matching forwardlinks"
3468 );
3469 }
3470
3471 #[test]
3472 fn find_links_to_ignores_fenced_example_link() {
3473 let dir = empty_store();
3474 let root = dir.path();
3475 write(
3476 root,
3477 "records/concepts/howto.md",
3478 "---\ntype: concept\nmeta-type: conclusion\nsummary: s\n---\n```markdown\n[[records/contacts/sarah]]\n```\n",
3479 );
3480 let store = open(&dir);
3481 let got = store
3482 .find_links_to(Path::new("records/contacts/sarah"))
3483 .unwrap();
3484 assert!(
3485 got.is_empty(),
3486 "a `[[...]]` only inside a fenced code block is not a backward edge: {got:?}"
3487 );
3488 }
3489
3490 #[cfg(unix)]
3491 #[test]
3492 fn find_links_to_matches_case_variant_on_case_insensitive_fs() {
3493 // Only meaningful on a case-insensitive filesystem; on a case-sensitive
3494 // one the case-variant link is genuinely a different target.
3495 if !fs_is_case_insensitive() {
3496 return;
3497 }
3498 let dir = empty_store();
3499 let root = dir.path();
3500 write(
3501 root,
3502 "records/profiles/bio.md",
3503 "---\ntype: profile\nmeta-type: conclusion\nsummary: s\n---\nSee [[records/contacts/Sarah-Chen]].\n",
3504 );
3505 let store = open(&dir);
3506 let got = rels(
3507 &store
3508 .find_links_to(Path::new("records/contacts/sarah-chen"))
3509 .unwrap(),
3510 );
3511 assert_eq!(
3512 got,
3513 vec!["records/profiles/bio.md".to_string()],
3514 "a case-variant link must be found on a case-insensitive filesystem"
3515 );
3516 }
3517}