Skip to main content

dbmd_core/
index.rs

1//! `index` — the hierarchical content catalog.
2//!
3//! A uniform three-level tree: root + per-layer + per-type-folder. **Two
4//! artifacts per type-folder:** the human `index.md` (capped 500, recency
5//! browse) and the machine `index.jsonl` (complete, structured — one JSON
6//! object per file). Both read `summary` + key frontmatter fields + links
7//! directly from each file — there is no extraction logic here.
8//!
9//! **Maintained write-through** by the write commands ([`Index::on_write`] /
10//! [`Index::on_rename`] / [`Index::on_remove`] — the loop path, O(changed), no
11//! store walk); [`Index::rebuild_all`] is the from-scratch SWEEP repair.
12//!
13//! **Key invariant:** write-through must produce a byte-identical `index.md`
14//! and (post-compaction) `index.jsonl` to a full [`Index::rebuild_all`] over
15//! the same end state — the loop path can never drift from the repair path.
16//!
17//! # Implementation notes (deviations the reader should know)
18//!
19//! - **Self-contained, by design.** This module does its own shard-aware folder
20//!   walk, its own minimal frontmatter read, and its own atomic write, using
21//!   only `store.root` (a public field) and the `serde_yml` / `serde_json` /
22//!   `chrono` / `walkdir` crates rather than routing through the sibling
23//!   `store`/`parser` helpers ([`Store::walk_type_folder`],
24//!   [`Store::recent_in_type_folder`], [`parser::read_file`], …). The index has
25//!   to stamp a *deterministic* `updated:` and emit a *canonical, compacted*
26//!   `index.jsonl` (see the two notes below); keeping the read/walk/write local
27//!   is what makes the byte-identity invariant a true byte comparison, free of
28//!   any incidental formatting the shared readers might introduce. The public
29//!   signatures in `lib.rs` are untouched.
30//! - **Deterministic `updated:` on the index files themselves.** An index's own
31//!   `updated` frontmatter is derived as the max `updated` over the files it
32//!   catalogs (max over children for root/layer) — NOT wall-clock-now. This is
33//!   what makes the byte-identity invariant a *true* byte comparison: a
34//!   write-through write and a `rebuild_all` over the same end state stamp the
35//!   same value. (The SPEC's rendered examples show a wall-clock-looking value;
36//!   the conventions list only requires `updated: <RFC3339>`, and the
37//!   property-tested invariant dominates.)
38//! - **`index.jsonl` is always compacted.** Write-through rewrites the affected
39//!   type-folder's jsonl in canonical form (one current line per path, recency
40//!   order) rather than appending superseded/tombstone lines, so the jsonl is
41//!   byte-identical to `rebuild_all` *immediately* (a strictly stronger
42//!   guarantee than the SPEC's "post-compaction"). This keeps the loop cost at
43//!   one sidecar read + one rewrite per touched type-folder — O(folder), the
44//!   sanctioned loop primitive, never a whole-`Store::walk`.
45//! - **Root/layer entry styling** follows plan §index (`(N)` numeric counts;
46//!   layer headings in the root carry the layer's total count) which is more
47//!   specific than the SPEC's illustrative `(42 files)` prose example. Type
48//!   folders are listed alphabetically (a deterministic order a derived artifact
49//!   needs); `scope: type-folder` follows the conventions list, not the one
50//!   SPEC example that wrote `scope: folder`.
51
52use std::collections::{BTreeMap, BTreeSet};
53use std::fs;
54use std::io::Write as _;
55use std::path::{Path, PathBuf};
56
57use chrono::{DateTime, FixedOffset, SecondsFormat};
58use serde::{Deserialize, Serialize};
59use serde_json::Value;
60
61use crate::store::{Layer, Store};
62
63/// The browse-view cap for a type-folder `index.md`.
64const MD_CAP: usize = 500;
65
66/// Placeholder summary for a content file that has no `summary` frontmatter.
67/// The index never invents a real summary — that is `dbmd fm init`'s job; this
68/// marker is what `dbmd validate` keys off (`INDEX`-class issue).
69const MISSING_SUMMARY: &str = "(no summary)";
70
71/// The root `index.md` H1.
72const ROOT_TITLE: &str = "Knowledge base index";
73
74/// Which level of the catalog an [`Index`] represents.
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub enum IndexLevel {
77    /// The store-wide root `index.md` (layers + per-type counts).
78    Root,
79    /// A layer `index.md` (every type-folder under one layer).
80    Layer(Layer),
81    /// A type-folder `index.md` + `index.jsonl` (every file in the folder).
82    TypeFolder(PathBuf),
83}
84
85/// One record in a type-folder's `index.jsonl` — the complete, structured twin
86/// of a single `index.md` browse entry.
87///
88/// `tags` are the document's flat labels; `links` are its concept/relationship
89/// wiki-link targets. Both are copied verbatim from the file — never inferred.
90/// `fields` holds the remaining type-specific frontmatter so the structured
91/// query path can filter on any key without opening the file.
92#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
93pub struct IndexRecord {
94    /// Store-relative path of the file (the upsert key; last-write-wins).
95    pub path: PathBuf,
96    /// The file's `type`.
97    #[serde(rename = "type")]
98    pub type_: String,
99    /// The file's `summary`.
100    pub summary: String,
101    /// The file's flat `tags`.
102    #[serde(default)]
103    pub tags: Vec<String>,
104    /// The file's concept/relationship wiki-link targets (store-relative).
105    #[serde(default)]
106    pub links: Vec<String>,
107    /// `created` timestamp.
108    pub created: Option<DateTime<FixedOffset>>,
109    /// `updated` timestamp (the recency key for the `index.md` cap order).
110    pub updated: Option<DateTime<FixedOffset>>,
111    /// Remaining type-specific frontmatter fields, verbatim.
112    #[serde(flatten)]
113    pub fields: BTreeMap<String, Value>,
114}
115
116/// A built (or being-built) catalog for one [`IndexLevel`], with both rendered
117/// artifacts available. Pure data until written via [`Index::write_level`].
118#[derive(Debug, Clone, PartialEq)]
119pub struct Index {
120    /// Which level this catalog is for.
121    pub level: IndexLevel,
122    /// The complete record set for this level (type-folder level; empty for
123    /// root/layer rollups, which carry only counts).
124    pub records: Vec<IndexRecord>,
125    /// Per-child counts for root/layer rollups (child path → file count).
126    pub child_counts: BTreeMap<PathBuf, usize>,
127}
128
129impl Index {
130    /// Build a type-folder catalog by aggregating across date-shards, producing
131    /// both artifacts. `index.md` selection is recency (updated desc, ties by
132    /// path asc; cap 500 with a `## More` footer over the cap); `index.jsonl`
133    /// holds every file. A file missing `summary` gets a placeholder + a
134    /// validate-detectable issue (the index never invents summaries).
135    pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
136        let rel = normalize_rel(type_folder);
137        let abs = store.root.join(&rel);
138        let mut records = Vec::new();
139        for file_abs in walk_type_folder_files(&abs) {
140            let rel_path =
141                rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
142            records.push(record_from_file(&file_abs, rel_path)?);
143        }
144        sort_records(&mut records);
145        Ok(Index {
146            level: IndexLevel::TypeFolder(rel),
147            records,
148            child_counts: BTreeMap::new(),
149        })
150    }
151
152    /// Build a layer catalog: every non-empty type-folder under the layer with
153    /// `(N)` counts and a newest-file `summary` preview (≤ 80 chars).
154    pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
155        let mut child_counts = BTreeMap::new();
156        for tf in type_folders_in_layer(store, layer) {
157            let abs = store.root.join(&tf);
158            let n = walk_type_folder_files(&abs).len();
159            if n > 0 {
160                child_counts.insert(tf, n);
161            }
162        }
163        Ok(Index {
164            level: IndexLevel::Layer(layer),
165            records: Vec::new(),
166            child_counts,
167        })
168    }
169
170    /// Build the store-wide root catalog: one heading per non-empty layer with
171    /// total count + bulleted per-type sub-entries with `(N)` counts.
172    pub fn build_root(store: &Store) -> crate::Result<Index> {
173        let mut child_counts = BTreeMap::new();
174        for layer in Layer::all() {
175            for tf in type_folders_in_layer(store, layer) {
176                let abs = store.root.join(&tf);
177                let n = walk_type_folder_files(&abs).len();
178                if n > 0 {
179                    child_counts.insert(tf, n);
180                }
181            }
182        }
183        Ok(Index {
184            level: IndexLevel::Root,
185            records: Vec::new(),
186            child_counts,
187        })
188    }
189
190    /// Render this catalog as a canonical `index.md`.
191    pub fn to_markdown(&self) -> String {
192        match &self.level {
193            IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
194            IndexLevel::Layer(layer) => self.render_layer_md(*layer),
195            IndexLevel::Root => self.render_root_md(),
196        }
197    }
198
199    /// Render this type-folder catalog as the complete `index.jsonl` (one JSON
200    /// object per file, stable key order so diffs stay minimal). Type-folder
201    /// level only — root and layer stay markdown rollups.
202    pub fn to_jsonl(&self) -> String {
203        let mut out = String::new();
204        for rec in &self.records {
205            // The record type derives a deterministic, sorted key order
206            // (declared fields first, then the flattened `fields` BTreeMap).
207            let line = serde_json::to_string(rec).expect("IndexRecord serializes");
208            out.push_str(&line);
209            out.push('\n');
210        }
211        out
212    }
213
214    // ── rendering helpers ────────────────────────────────────────────────
215
216    fn render_type_folder_md(&self, folder: &Path) -> String {
217        let folder_disp = path_to_unix(folder);
218        let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
219        let mut s = String::new();
220        s.push_str("---\n");
221        s.push_str("type: index\n");
222        s.push_str("scope: type-folder\n");
223        s.push_str(&format!("folder: {folder_disp}\n"));
224        if let Some(ts) = updated {
225            s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
226        }
227        s.push_str("---\n\n");
228        s.push_str(&format!("# {folder_disp}\n\n"));
229
230        let shown = self.records.len().min(MD_CAP);
231        for rec in self.records.iter().take(shown) {
232            s.push_str(&format_md_entry(rec));
233            s.push('\n');
234        }
235
236        if self.records.len() > MD_CAP {
237            let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
238            let layer = folder
239                .components()
240                .next()
241                .and_then(|c| c.as_os_str().to_str())
242                .unwrap_or("");
243            s.push('\n');
244            s.push_str(&more_footer(self.records.len(), type_, layer));
245        }
246        s
247    }
248
249    /// Store-less layer rollup: counts only, no preview / no derived `updated`
250    /// (a layer index needs each child's on-disk jsonl for those — see
251    /// [`render_layer_md_with_store`], the canonical path every disk write
252    /// uses). This pure-data render is structurally identical sans preview.
253    fn render_layer_md(&self, layer: Layer) -> String {
254        let layer_dir = layer_dir_name(layer);
255        let mut s = String::new();
256        s.push_str("---\n");
257        s.push_str("type: index\n");
258        s.push_str("scope: layer\n");
259        s.push_str(&format!("folder: {layer_dir}\n"));
260        s.push_str("---\n\n");
261        s.push_str(&format!("# {layer_dir}\n\n"));
262        for (tf, n) in &self.child_counts {
263            let tf_unix = path_to_unix(tf);
264            let display = capitalize(folder_basename(tf));
265            s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
266        }
267        s
268    }
269
270    /// Store-less root rollup: counts only (the canonical disk render adds a
271    /// derived `updated` — see [`render_root_md_with_store`]).
272    fn render_root_md(&self) -> String {
273        let mut s = String::new();
274        s.push_str("---\n");
275        s.push_str("type: index\n");
276        s.push_str("scope: root\n");
277        s.push_str("---\n\n");
278        s.push_str(&format!("# {ROOT_TITLE}\n"));
279        for layer in Layer::all() {
280            let layer_dir = layer_dir_name(layer);
281            let prefix = format!("{layer_dir}/");
282            let children: Vec<(&PathBuf, &usize)> = self
283                .child_counts
284                .iter()
285                .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
286                .collect();
287            if children.is_empty() {
288                continue;
289            }
290            let total: usize = children.iter().map(|(_, n)| **n).sum();
291            s.push('\n');
292            s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
293            for (tf, n) in children {
294                let tf_unix = path_to_unix(tf);
295                let display = capitalize(folder_basename(tf));
296                s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
297            }
298        }
299        s
300    }
301}
302
303// ─────────────────────────────────────────────────────────────────────────
304// Write-through + sweep (free functions on the impl block).
305// ─────────────────────────────────────────────────────────────────────────
306
307impl Index {
308    /// **Write-through (loop, O(changed)).** Upsert a new/updated content file.
309    /// Reads the affected type-folder's `index.jsonl` (the sanctioned per-folder
310    /// sidecar read — never a whole-store walk), applies the change, and
311    /// atomically rewrites that folder's `index.md` + `index.jsonl` plus the
312    /// parent layer + root rollups so the artifacts equal a `rebuild_all` over
313    /// the same end state.
314    pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
315        let file_rel = normalize_rel(file);
316        let file_abs = store.root.join(&file_rel);
317        let folder = type_folder_of(&file_rel)
318            .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
319        let record = record_from_file(&file_abs, file_rel.clone())?;
320
321        let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
322        records.retain(|r| r.path != record.path);
323        records.push(record);
324        sort_records(&mut records);
325
326        write_type_folder_artifacts(store, &folder, &records)?;
327        update_parents(store, &folder)?;
328        Ok(())
329    }
330
331    /// **Write-through (loop, O(changed)).** Move a file's entry between
332    /// type-folder indexes (or within, if the same folder) in both `index.md`
333    /// and `index.jsonl`, fixing counts on both sides.
334    pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
335        let old_rel = normalize_rel(old);
336        let new_rel = normalize_rel(new);
337        let old_folder = type_folder_of(&old_rel)
338            .ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
339        let new_folder = type_folder_of(&new_rel)
340            .ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
341
342        // Drop from the old folder.
343        let mut old_records =
344            read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
345        old_records.retain(|r| r.path != old_rel);
346
347        if old_folder == new_folder {
348            // Same folder: re-read the (now-renamed) file and upsert.
349            let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
350            old_records.retain(|r| r.path != record.path);
351            old_records.push(record);
352            sort_records(&mut old_records);
353            write_type_folder_artifacts(store, &old_folder, &old_records)?;
354            update_parents(store, &old_folder)?;
355            return Ok(());
356        }
357
358        // Cross-folder: write the trimmed old folder (or drop its indexes if
359        // now empty), then upsert into the new folder.
360        sort_records(&mut old_records);
361        write_type_folder_artifacts(store, &old_folder, &old_records)?;
362
363        let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
364        let mut new_records =
365            read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
366        new_records.retain(|r| r.path != record.path);
367        new_records.push(record);
368        sort_records(&mut new_records);
369        write_type_folder_artifacts(store, &new_folder, &new_records)?;
370
371        update_parents(store, &old_folder)?;
372        update_parents(store, &new_folder)?;
373        Ok(())
374    }
375
376    /// **Write-through (loop, O(changed)).** Drop a file's entry from both
377    /// `index.md` and `index.jsonl`; decrement counts; if the browse view drops
378    /// below the cap, the next-most-recent is already present in the complete
379    /// jsonl record set and re-renders into the md automatically.
380    pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
381        let file_rel = normalize_rel(file);
382        let folder = type_folder_of(&file_rel)
383            .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
384        let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
385        let before = records.len();
386        records.retain(|r| r.path != file_rel);
387        if records.len() == before {
388            // Nothing to remove; still normalize the folder + parents so the
389            // artifacts stay canonical.
390        }
391        sort_records(&mut records);
392        write_type_folder_artifacts(store, &folder, &records)?;
393        update_parents(store, &folder)?;
394        Ok(())
395    }
396
397    /// **SWEEP repair.** Walk the store once and atomically (re)write root +
398    /// every non-empty layer + every non-empty type-folder `index.md` and
399    /// `index.jsonl` (compacting the jsonl). Also runs [`Index::cleanup`].
400    pub fn rebuild_all(store: &Store) -> crate::Result<()> {
401        Index::cleanup(store)?;
402        for layer in Layer::all() {
403            for tf in type_folders_in_layer(store, layer) {
404                let idx = Index::build_type_folder(store, &tf)?;
405                if idx.records.is_empty() {
406                    continue;
407                }
408                write_type_folder_artifacts(store, &tf, &idx.records)?;
409            }
410            let layer_idx = Index::build_layer(store, layer)?;
411            let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
412            if layer_idx.child_counts.is_empty() {
413                remove_if_exists(&layer_index_md)?;
414            } else {
415                write_atomic(
416                    &layer_index_md,
417                    render_layer_md_with_store(store, &layer_idx),
418                )?;
419            }
420        }
421        let root_idx = Index::build_root(store)?;
422        let root_index_md = store.root.join("index.md");
423        if root_idx.child_counts.is_empty() {
424            remove_if_exists(&root_index_md)?;
425        } else {
426            write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
427        }
428        Ok(())
429    }
430
431    /// Atomically write a single level's artifact(s) to disk.
432    pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
433        match level {
434            IndexLevel::TypeFolder(folder) => {
435                let idx = Index::build_type_folder(store, folder)?;
436                if idx.records.is_empty() {
437                    remove_if_exists(&store.root.join(folder).join("index.md"))?;
438                    remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
439                } else {
440                    write_type_folder_artifacts(store, folder, &idx.records)?;
441                }
442            }
443            IndexLevel::Layer(layer) => {
444                let idx = Index::build_layer(store, *layer)?;
445                let p = store.root.join(layer_dir_name(*layer)).join("index.md");
446                if idx.child_counts.is_empty() {
447                    remove_if_exists(&p)?;
448                } else {
449                    write_atomic(&p, render_layer_md_with_store(store, &idx))?;
450                }
451            }
452            IndexLevel::Root => {
453                let idx = Index::build_root(store)?;
454                let p = store.root.join("index.md");
455                if idx.child_counts.is_empty() {
456                    remove_if_exists(&p)?;
457                } else {
458                    write_atomic(&p, render_root_md_with_store(store, &idx))?;
459                }
460            }
461        }
462        Ok(())
463    }
464
465    /// Render the generated indexes to a string with `--- <path> ---`
466    /// separators instead of writing them (`--dry-run`).
467    pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
468        let mut out = String::new();
469        match level {
470            IndexLevel::TypeFolder(folder) => {
471                let idx = Index::build_type_folder(store, folder)?;
472                let md_path = path_to_unix(&folder.join("index.md"));
473                let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
474                out.push_str(&format!("--- {md_path} ---\n"));
475                out.push_str(&idx.to_markdown());
476                out.push_str(&format!("--- {jsonl_path} ---\n"));
477                out.push_str(&idx.to_jsonl());
478            }
479            IndexLevel::Layer(layer) => {
480                let idx = Index::build_layer(store, *layer)?;
481                let md_path = format!("{}/index.md", layer_dir_name(*layer));
482                out.push_str(&format!("--- {md_path} ---\n"));
483                out.push_str(&render_layer_md_with_store(store, &idx));
484            }
485            IndexLevel::Root => {
486                let idx = Index::build_root(store)?;
487                out.push_str("--- index.md ---\n");
488                out.push_str(&render_root_md_with_store(store, &idx));
489            }
490        }
491        Ok(out)
492    }
493
494    /// Cleanup pass (part of [`Index::rebuild_all`]): delete `index.md` /
495    /// `index.jsonl` in non-canonical folders (empty folders, or date-shards
496    /// that should carry none). Symmetric with index creation.
497    pub fn cleanup(store: &Store) -> crate::Result<()> {
498        for layer in Layer::all() {
499            let layer_dir = store.root.join(layer_dir_name(layer));
500            if !layer_dir.is_dir() {
501                continue;
502            }
503            for tf in type_folders_in_layer(store, layer) {
504                let tf_abs = store.root.join(&tf);
505                // Any index inside a shard (below the type-folder root) is
506                // non-canonical: delete it.
507                for entry in walkdir::WalkDir::new(&tf_abs)
508                    .min_depth(1)
509                    .into_iter()
510                    .filter_map(|e| e.ok())
511                {
512                    let p = entry.path();
513                    if is_index_artifact(p) {
514                        remove_if_exists(p)?;
515                    }
516                }
517                // Empty type-folder → no index at its root either.
518                if walk_type_folder_files(&tf_abs).is_empty() {
519                    remove_if_exists(&tf_abs.join("index.md"))?;
520                    remove_if_exists(&tf_abs.join("index.jsonl"))?;
521                }
522            }
523        }
524        Ok(())
525    }
526}
527
528// ─────────────────────────────────────────────────────────────────────────
529// Private free helpers — all self-contained, none call back into Store/parser.
530// ─────────────────────────────────────────────────────────────────────────
531
532/// Write both artifacts for a type-folder, or delete them if the folder is now
533/// empty. The single funnel both write-through and rebuild go through, so their
534/// output is byte-identical by construction.
535fn write_type_folder_artifacts(
536    store: &Store,
537    folder: &Path,
538    records: &[IndexRecord],
539) -> crate::Result<()> {
540    let folder_abs = store.root.join(folder);
541    let md_path = folder_abs.join("index.md");
542    let jsonl_path = folder_abs.join("index.jsonl");
543    if records.is_empty() {
544        remove_if_exists(&md_path)?;
545        remove_if_exists(&jsonl_path)?;
546        return Ok(());
547    }
548    let idx = Index {
549        level: IndexLevel::TypeFolder(folder.to_path_buf()),
550        records: records.to_vec(),
551        child_counts: BTreeMap::new(),
552    };
553    write_atomic(&md_path, idx.to_markdown())?;
554    write_atomic(&jsonl_path, idx.to_jsonl())?;
555    Ok(())
556}
557
558/// Re-render the layer + root rollups that sit above `folder` — the
559/// **loop path**, O(changed). Counts come from the type-folders' on-disk
560/// `index.jsonl` sidecars ([`child_counts_from_jsonl`]), NOT from a content-tree
561/// walk: a single write touches only the affected layer's sidecars (for the
562/// layer rollup) and one sidecar per type-folder (for the root rollup) — never
563/// the millions of files under the shards. `build_layer` / `build_root` (which
564/// *do* walk the content tree) are reserved for the from-scratch sweeps
565/// ([`Index::rebuild_all`], [`Index::write_level`], [`Index::render_dry_run`]).
566/// The result is byte-identical to those builders because in the loop — exactly
567/// as in `rebuild_all` — every touched folder's jsonl is rewritten before its
568/// parents are rolled up, so `jsonl_record_count == walk_type_folder_files.len()`
569/// for every folder read here.
570fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
571    let layer = folder
572        .components()
573        .next()
574        .and_then(|c| c.as_os_str().to_str())
575        .and_then(layer_from_dir_name);
576    if let Some(layer) = layer {
577        let idx = Index {
578            level: IndexLevel::Layer(layer),
579            records: Vec::new(),
580            child_counts: child_counts_from_jsonl(store, &[layer])?,
581        };
582        let p = store.root.join(layer_dir_name(layer)).join("index.md");
583        if idx.child_counts.is_empty() {
584            remove_if_exists(&p)?;
585        } else {
586            write_atomic(&p, render_layer_md_with_store(store, &idx))?;
587        }
588    }
589    let root = Index {
590        level: IndexLevel::Root,
591        records: Vec::new(),
592        child_counts: child_counts_from_jsonl(store, &Layer::all())?,
593    };
594    let rp = store.root.join("index.md");
595    if root.child_counts.is_empty() {
596        remove_if_exists(&rp)?;
597    } else {
598        write_atomic(&rp, render_root_md_with_store(store, &root))?;
599    }
600    Ok(())
601}
602
603/// Render a layer `index.md`, reading each child's newest summary + max-updated
604/// straight from its on-disk `index.jsonl` (so the rollup matches the folder
605/// artifacts exactly, write-through and rebuild alike).
606fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
607    let layer = match idx.level {
608        IndexLevel::Layer(l) => l,
609        _ => unreachable!("render_layer_md_with_store called on non-layer"),
610    };
611    let layer_dir = layer_dir_name(layer);
612    let mut max_upd: Option<DateTime<FixedOffset>> = None;
613    let mut entries = String::new();
614    for (tf, n) in &idx.child_counts {
615        let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
616        let newest = recs.first();
617        if let Some(u) = newest.and_then(|r| r.updated) {
618            max_upd = Some(match max_upd {
619                Some(cur) if cur >= u => cur,
620                _ => u,
621            });
622        }
623        let tf_unix = path_to_unix(tf);
624        let display = capitalize(folder_basename(tf));
625        let preview = newest
626            .map(|r| truncate(&r.summary, 80))
627            .filter(|p| !p.is_empty() && p != MISSING_SUMMARY);
628        match preview {
629            Some(p) => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n}) — {p}\n")),
630            None => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n")),
631        }
632    }
633    let mut s = String::new();
634    s.push_str("---\n");
635    s.push_str("type: index\n");
636    s.push_str("scope: layer\n");
637    s.push_str(&format!("folder: {layer_dir}\n"));
638    if let Some(ts) = max_upd {
639        s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
640    }
641    s.push_str("---\n\n");
642    s.push_str(&format!("# {layer_dir}\n\n"));
643    s.push_str(&entries);
644    s
645}
646
647/// Render the root `index.md`, taking each child's max-updated from its on-disk
648/// `index.jsonl`.
649fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
650    let mut max_upd: Option<DateTime<FixedOffset>> = None;
651    for tf in idx.child_counts.keys() {
652        let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
653        if let Some(u) = recs.first().and_then(|r| r.updated) {
654            max_upd = Some(match max_upd {
655                Some(cur) if cur >= u => cur,
656                _ => u,
657            });
658        }
659    }
660    let mut s = String::new();
661    s.push_str("---\n");
662    s.push_str("type: index\n");
663    s.push_str("scope: root\n");
664    if let Some(ts) = max_upd {
665        s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
666    }
667    s.push_str("---\n\n");
668    s.push_str(&format!("# {ROOT_TITLE}\n"));
669    for layer in Layer::all() {
670        let layer_dir = layer_dir_name(layer);
671        let prefix = format!("{layer_dir}/");
672        let children: Vec<(&PathBuf, &usize)> = idx
673            .child_counts
674            .iter()
675            .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
676            .collect();
677        if children.is_empty() {
678            continue;
679        }
680        let total: usize = children.iter().map(|(_, n)| **n).sum();
681        s.push('\n');
682        s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
683        for (tf, n) in children {
684            let tf_unix = path_to_unix(tf);
685            let display = capitalize(folder_basename(tf));
686            s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
687        }
688    }
689    s
690}
691
692/// One `index.md` browse line: `- [[path]] — summary  ·  #tag #tag` (the
693/// `  ·  #…` suffix omitted when the file has no tags). The wiki-link target is
694/// the canonical **bare** store-relative path (no `.md` extension — the
695/// doctrine the writers emit and `validate` enforces via
696/// `WIKI_LINK_HAS_EXTENSION`); the jsonl `path` keeps the real on-disk name.
697fn format_md_entry(rec: &IndexRecord) -> String {
698    let path = wiki_target(&rec.path);
699    let mut line = format!("- [[{path}]] — {}", rec.summary);
700    if !rec.tags.is_empty() {
701        let tags = rec
702            .tags
703            .iter()
704            .map(|t| format!("#{t}"))
705            .collect::<Vec<_>>()
706            .join(" ");
707        line.push_str(&format!("  ·  {tags}"));
708    }
709    line
710}
711
712/// The deterministic `## More` footer for an over-cap type-folder.
713fn more_footer(total: usize, type_: &str, layer: &str) -> String {
714    format!(
715        "## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd index query --type {type_} --in {layer}` for the complete catalog.\n"
716    )
717}
718
719/// Canonical total order: `updated` descending (None sorts last), ties broken
720/// by store-relative path ascending. A *total* order, so write-through and
721/// rebuild never disagree on #500 vs #501.
722fn sort_records(records: &mut [IndexRecord]) {
723    records.sort_by(|a, b| {
724        match (b.updated, a.updated) {
725            (Some(bu), Some(au)) => bu.cmp(&au),
726            (Some(_), None) => std::cmp::Ordering::Greater, // a is None → after b
727            (None, Some(_)) => std::cmp::Ordering::Less,    // b is None → after a
728            (None, None) => std::cmp::Ordering::Equal,
729        }
730        .then_with(|| a.path.cmp(&b.path))
731    });
732}
733
734impl IndexRecord {
735    /// Build the [`IndexRecord`] a freshly-rebuilt `index.jsonl` *should* hold
736    /// for the file at `abs` (catalogued under store-relative `rel`).
737    ///
738    /// This is the single canonical projection from frontmatter → sidecar
739    /// record: [`Index::build_type_folder`] uses the same path to write the
740    /// jsonl, so the validator can rebuild the expected record here and compare
741    /// it field-for-field against the committed line — covering **every**
742    /// queryable/dedup field the query path reads (`summary`, `type`, `tags`,
743    /// `links`, `created`, `updated`, and every type-specific `fields` entry
744    /// like `email` / `domain` / `company` / `amount` / `vendor`) without the
745    /// validator hand-rolling (and drifting from) the projection per field.
746    pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
747        record_from_file(abs, rel)
748    }
749}
750
751/// Build an [`IndexRecord`] from a file on disk. Missing `summary` →
752/// [`MISSING_SUMMARY`] placeholder (the index never invents a summary).
753fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
754    let meta = read_frontmatter(abs)?;
755    Ok(IndexRecord {
756        path: rel,
757        type_: meta.type_.unwrap_or_default(),
758        summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
759        tags: meta.tags,
760        links: meta.links,
761        created: meta.created,
762        updated: meta.updated,
763        fields: meta.fields,
764    })
765}
766
767/// The slice of a frontmatter this module needs.
768struct FileMeta {
769    type_: Option<String>,
770    summary: Option<String>,
771    tags: Vec<String>,
772    links: Vec<String>,
773    created: Option<DateTime<FixedOffset>>,
774    updated: Option<DateTime<FixedOffset>>,
775    fields: BTreeMap<String, Value>,
776}
777
778/// Minimal frontmatter read: split the leading `---`…`---` block and parse it
779/// as YAML, extracting the typed fields and spilling the rest into `fields`.
780/// Self-contained (does not route through the `parser` module).
781fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
782    let text = fs::read_to_string(abs)?;
783    let yaml = extract_frontmatter_block(&text).unwrap_or_default();
784    let map: serde_yml::Mapping = if yaml.trim().is_empty() {
785        serde_yml::Mapping::new()
786    } else {
787        serde_yml::from_str(&yaml).map_err(|e| {
788            crate::Error::Store(crate::store::StoreError::BadTypeIndex {
789                path: abs.to_path_buf(),
790                message: format!("frontmatter YAML: {e}"),
791            })
792        })?
793    };
794
795    let mut type_ = None;
796    let mut summary = None;
797    let mut tags = Vec::new();
798    let mut links = Vec::new();
799    let mut created = None;
800    let mut updated = None;
801    let mut fields = BTreeMap::new();
802
803    for (k, v) in map {
804        let key = match k.as_str() {
805            Some(s) => s.to_string(),
806            None => continue,
807        };
808        match key.as_str() {
809            "type" => type_ = v.as_str().map(str::to_string),
810            "summary" => summary = v.as_str().map(str::to_string),
811            "tags" => tags = yaml_string_list(&v),
812            "links" => links = yaml_string_list(&v),
813            "created" => created = v.as_str().and_then(parse_ts),
814            "updated" => updated = v.as_str().and_then(parse_ts),
815            // `path`, `type`, `summary`, `tags`, `links`, `created`, `updated`
816            // are the reserved IndexRecord keys; everything else (including
817            // `id`, `status`, type-specific fields) goes to `fields`.
818            "path" => {}
819            _ => {
820                if let Ok(jv) = serde_json::to_value(&v) {
821                    fields.insert(key, jv);
822                }
823            }
824        }
825    }
826
827    Ok(FileMeta {
828        type_,
829        summary,
830        tags,
831        links,
832        created,
833        updated,
834        fields,
835    })
836}
837
838/// Pull the YAML between a leading `---` line and the next `---` line. Returns
839/// `None` when the file has no frontmatter fence at its very start.
840fn extract_frontmatter_block(text: &str) -> Option<String> {
841    let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
842    let mut lines = trimmed.lines();
843    let first = lines.next()?;
844    if first.trim_end() != "---" {
845        return None;
846    }
847    let mut block = String::new();
848    for line in lines {
849        if line.trim_end() == "---" {
850            return Some(block);
851        }
852        block.push_str(line);
853        block.push('\n');
854    }
855    None // no closing fence
856}
857
858/// Read a string scalar or a sequence-of-string-scalars into a `Vec<String>`.
859/// Wiki-link items keep their `[[…]]` form verbatim.
860fn yaml_string_list(v: &serde_yml::Value) -> Vec<String> {
861    match v {
862        serde_yml::Value::String(s) => vec![s.clone()],
863        serde_yml::Value::Sequence(seq) => seq
864            .iter()
865            .filter_map(|item| item.as_str().map(str::to_string))
866            .collect(),
867        _ => Vec::new(),
868    }
869}
870
871/// Parse an RFC3339 timestamp scalar.
872fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
873    DateTime::parse_from_rfc3339(s.trim()).ok()
874}
875
876/// Render a timestamp the same way `serde_json` renders an `IndexRecord`
877/// timestamp (RFC3339, `Z` for UTC, sub-seconds preserved) so the md
878/// frontmatter and the jsonl agree byte-for-byte.
879fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
880    ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
881}
882
883/// Max `updated` over an iterator of optional timestamps.
884fn max_updated<'a>(
885    it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
886) -> Option<DateTime<FixedOffset>> {
887    let mut best: Option<DateTime<FixedOffset>> = None;
888    for ts in it.flatten() {
889        best = Some(match best {
890            Some(cur) if cur >= *ts => cur,
891            _ => *ts,
892        });
893    }
894    best
895}
896
897/// Read a type-folder's `index.jsonl` into records, applying last-write-wins by
898/// `path` over any un-compacted lines (so a half-compacted jsonl still reads
899/// cleanly). Missing file → empty set. Returns records in canonical order.
900fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
901    let text = match fs::read_to_string(jsonl) {
902        Ok(t) => t,
903        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
904        Err(e) => return Err(e.into()),
905    };
906    // Last-write-wins by path; preserve only the final occurrence.
907    let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
908    for (i, line) in text.lines().enumerate() {
909        if line.trim().is_empty() {
910            continue;
911        }
912        let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
913            crate::Error::Store(crate::store::StoreError::BadTypeIndex {
914                path: jsonl.to_path_buf(),
915                message: format!("line {}: {e}", i + 1),
916            })
917        })?;
918        by_path.insert(rec.path.clone(), rec);
919    }
920    let mut records: Vec<IndexRecord> = by_path.into_values().collect();
921    sort_records(&mut records);
922    Ok(records)
923}
924
925/// Count the distinct content files a type-folder's `index.jsonl` catalogs —
926/// the **loop-path** count primitive, the rollup analogue of reading the
927/// per-folder sidecar. It reads only the one small sidecar (one line per file),
928/// never the content tree, so a rollup recompute over `K` type-folders is
929/// `O(K · folder)` sidecar reads — never `O(store files)` like
930/// [`walk_type_folder_files`]. Distinct-`path` (last-write-wins) so the count is
931/// byte-identical to [`read_jsonl_records`]`.len()` even on a half-compacted
932/// jsonl; a missing sidecar is `0`. Within the loop and within
933/// [`Index::rebuild_all`] the folder's jsonl is always rewritten before its
934/// parents are rolled up, so this equals `walk_type_folder_files(folder).len()`.
935fn jsonl_record_count(jsonl: &Path) -> crate::Result<usize> {
936    let text = match fs::read_to_string(jsonl) {
937        Ok(t) => t,
938        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(0),
939        Err(e) => return Err(e.into()),
940    };
941    let mut paths: BTreeSet<PathBuf> = BTreeSet::new();
942    for (i, line) in text.lines().enumerate() {
943        if line.trim().is_empty() {
944            continue;
945        }
946        let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
947            crate::Error::Store(crate::store::StoreError::BadTypeIndex {
948                path: jsonl.to_path_buf(),
949                message: format!("line {}: {e}", i + 1),
950            })
951        })?;
952        paths.insert(rec.path);
953    }
954    Ok(paths.len())
955}
956
957/// Per-child rollup counts for `layers`, read from each type-folder's on-disk
958/// `index.jsonl` (via [`jsonl_record_count`]) rather than walked from the
959/// content tree. The **loop-path** counterpart to the from-scratch counting in
960/// [`Index::build_layer`] / [`Index::build_root`]: it keeps [`update_parents`]
961/// `O(type-folders)` so a single write never re-enumerates the whole store.
962fn child_counts_from_jsonl(
963    store: &Store,
964    layers: &[Layer],
965) -> crate::Result<BTreeMap<PathBuf, usize>> {
966    let mut child_counts = BTreeMap::new();
967    for &layer in layers {
968        for tf in type_folders_in_layer(store, layer) {
969            let n = jsonl_record_count(&store.root.join(&tf).join("index.jsonl"))?;
970            if n > 0 {
971                child_counts.insert(tf, n);
972            }
973        }
974    }
975    Ok(child_counts)
976}
977
978/// Walk a type-folder's `.md` content files, recursing through date-shards,
979/// excluding the `index.md` artifact itself and any hidden entries.
980fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
981    let mut out = Vec::new();
982    if !folder_abs.is_dir() {
983        return out;
984    }
985    for entry in walkdir::WalkDir::new(folder_abs)
986        .into_iter()
987        .filter_entry(|e| !is_hidden(e.file_name()))
988        .filter_map(|e| e.ok())
989    {
990        if !entry.file_type().is_file() {
991            continue;
992        }
993        let p = entry.path();
994        if p.extension().and_then(|e| e.to_str()) != Some("md") {
995            continue;
996        }
997        if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
998            continue;
999        }
1000        out.push(p.to_path_buf());
1001    }
1002    out
1003}
1004
1005/// The immediate type-folders under a layer (one directory level below the
1006/// layer dir), as store-relative paths. Hidden dirs and `log/` are skipped.
1007fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1008    let layer_dir = store.root.join(layer_dir_name(layer));
1009    let mut out = Vec::new();
1010    let rd = match fs::read_dir(&layer_dir) {
1011        Ok(rd) => rd,
1012        Err(_) => return out,
1013    };
1014    for entry in rd.flatten() {
1015        if !entry.path().is_dir() {
1016            continue;
1017        }
1018        let name = entry.file_name();
1019        let name = match name.to_str() {
1020            Some(n) => n,
1021            None => continue,
1022        };
1023        if is_hidden(entry.file_name().as_os_str()) || name == "log" {
1024            continue;
1025        }
1026        out.push(PathBuf::from(layer_dir_name(layer)).join(name));
1027    }
1028    out.sort();
1029    out
1030}
1031
1032/// The type-folder a content file belongs to: `<layer>/<type>` (the first two
1033/// path components), or `None` if the path is not under a known layer with at
1034/// least a type segment.
1035fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
1036    let mut comps = file_rel.components();
1037    let layer = comps.next()?.as_os_str().to_str()?;
1038    layer_from_dir_name(layer)?;
1039    let type_seg = comps.next()?.as_os_str().to_str()?;
1040    Some(PathBuf::from(layer).join(type_seg))
1041}
1042
1043/// Convert an absolute path under `root` to a store-relative path.
1044fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
1045    abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
1046}
1047
1048/// Normalize a possibly-absolute or `./`-prefixed path to a clean
1049/// store-relative form (drops a leading `./`; leaves already-relative paths).
1050fn normalize_rel(p: &Path) -> PathBuf {
1051    let s = path_to_unix(p);
1052    let s = s.strip_prefix("./").unwrap_or(&s);
1053    PathBuf::from(s)
1054}
1055
1056fn is_index_artifact(p: &Path) -> bool {
1057    matches!(
1058        p.file_name().and_then(|n| n.to_str()),
1059        Some("index.md") | Some("index.jsonl")
1060    )
1061}
1062
1063fn is_hidden(name: &std::ffi::OsStr) -> bool {
1064    name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
1065}
1066
1067fn layer_dir_name(layer: Layer) -> &'static str {
1068    match layer {
1069        Layer::Sources => "sources",
1070        Layer::Records => "records",
1071        Layer::Wiki => "wiki",
1072    }
1073}
1074
1075/// Local layer-name parse. Mirrors the contract of [`Layer::from_dir_name`];
1076/// kept local to keep this module's walk self-contained (see the module header).
1077fn layer_from_dir_name(name: &str) -> Option<Layer> {
1078    match name {
1079        "sources" => Some(Layer::Sources),
1080        "records" => Some(Layer::Records),
1081        "wiki" => Some(Layer::Wiki),
1082        _ => None,
1083    }
1084}
1085
1086/// The final path component as a `&str` (folder basename).
1087fn folder_basename(p: &Path) -> &str {
1088    p.file_name().and_then(|n| n.to_str()).unwrap_or("")
1089}
1090
1091/// The canonical wiki-link target for a content path: the store-relative path
1092/// with `/` separators and the trailing `.md` stripped (the bare form the
1093/// `index.md` browse view links to).
1094fn wiki_target(p: &Path) -> String {
1095    let unix = path_to_unix(p);
1096    unix.strip_suffix(".md").unwrap_or(&unix).to_string()
1097}
1098
1099/// Render a path with `/` separators regardless of host OS, so artifacts are
1100/// identical on every platform.
1101fn path_to_unix(p: &Path) -> String {
1102    p.components()
1103        .filter_map(|c| c.as_os_str().to_str())
1104        .collect::<Vec<_>>()
1105        .join("/")
1106}
1107
1108/// ASCII-capitalize the first character.
1109fn capitalize(s: &str) -> String {
1110    let mut chars = s.chars();
1111    match chars.next() {
1112        Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
1113        None => String::new(),
1114    }
1115}
1116
1117/// Truncate to at most `max` chars (char-boundary safe), single-line.
1118fn truncate(s: &str, max: usize) -> String {
1119    let one_line: String = s.split_whitespace().collect::<Vec<_>>().join(" ");
1120    if one_line.chars().count() <= max {
1121        one_line
1122    } else {
1123        one_line.chars().take(max).collect()
1124    }
1125}
1126
1127fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
1128    if let Some(parent) = path.parent() {
1129        fs::create_dir_all(parent)?;
1130    }
1131    let dir = path.parent().unwrap_or_else(|| Path::new("."));
1132    let mut tmp = tempfile_in(dir)?;
1133    tmp.write_all(contents.as_bytes())?;
1134    tmp.flush()?;
1135    tmp.persist(path)?;
1136    Ok(())
1137}
1138
1139fn remove_if_exists(path: &Path) -> crate::Result<()> {
1140    match fs::remove_file(path) {
1141        Ok(()) => Ok(()),
1142        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
1143        Err(e) => Err(e.into()),
1144    }
1145}
1146
1147fn bad_index(path: &Path, msg: &str) -> crate::Error {
1148    crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1149        path: path.to_path_buf(),
1150        message: msg.to_string(),
1151    })
1152}
1153
1154// A tiny atomic-write helper. `tempfile` is a dev-dependency for tests; for
1155// the library path we hand-roll a temp-file-then-rename so writes are atomic
1156// without pulling `tempfile` into the non-dev dependency set. The file handle
1157// is held in an `Option` so `persist` can take it out without fighting the
1158// `Drop` impl (which only cleans up an un-persisted temp file).
1159struct AtomicTemp {
1160    file: Option<fs::File>,
1161    path: PathBuf,
1162    persisted: bool,
1163}
1164
1165impl AtomicTemp {
1166    fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
1167        self.file.as_mut().expect("temp file open").write_all(bytes)
1168    }
1169    fn flush(&mut self) -> std::io::Result<()> {
1170        self.file.as_mut().expect("temp file open").flush()
1171    }
1172    fn persist(mut self, dest: &Path) -> std::io::Result<()> {
1173        if let Some(f) = self.file.take() {
1174            f.sync_all().ok();
1175            // `f` dropped here, closing the handle before the rename.
1176        }
1177        fs::rename(&self.path, dest)?;
1178        self.persisted = true;
1179        Ok(())
1180    }
1181}
1182
1183impl Drop for AtomicTemp {
1184    fn drop(&mut self) {
1185        // Best-effort cleanup if not persisted (an error path bailed out).
1186        if !self.persisted {
1187            let _ = fs::remove_file(&self.path);
1188        }
1189    }
1190}
1191
1192fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
1193    use std::time::{SystemTime, UNIX_EPOCH};
1194    let nanos = SystemTime::now()
1195        .duration_since(UNIX_EPOCH)
1196        .map(|d| d.as_nanos())
1197        .unwrap_or(0);
1198    let pid = std::process::id();
1199    // Monotonic-ish unique suffix; the dir is the destination dir so rename is
1200    // same-filesystem and therefore atomic.
1201    let counter = next_temp_counter();
1202    let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
1203    let path = dir.join(name);
1204    let file = fs::OpenOptions::new()
1205        .write(true)
1206        .create_new(true)
1207        .open(&path)?;
1208    Ok(AtomicTemp {
1209        file: Some(file),
1210        path,
1211        persisted: false,
1212    })
1213}
1214
1215fn next_temp_counter() -> u64 {
1216    use std::sync::atomic::{AtomicU64, Ordering};
1217    static C: AtomicU64 = AtomicU64::new(0);
1218    C.fetch_add(1, Ordering::Relaxed)
1219}
1220
1221#[cfg(test)]
1222mod tests {
1223    use super::*;
1224    use std::collections::BTreeSet;
1225    use std::fs;
1226    use tempfile::TempDir;
1227
1228    // ── fixtures ─────────────────────────────────────────────────────────
1229
1230    /// A temp store with a `DB.md` marker. `store.config` is the parser default
1231    /// (these tests never exercise the config parser).
1232    fn mk_store() -> (TempDir, Store) {
1233        let dir = TempDir::new().unwrap();
1234        fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
1235        let store = Store {
1236            root: dir.path().to_path_buf(),
1237            config: crate::parser::Config::default(),
1238        };
1239        (dir, store)
1240    }
1241
1242    /// Write a content file at `rel` with the given frontmatter lines + body.
1243    /// `fm` is the raw YAML body between the fences (no `---`).
1244    fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
1245        let abs = store.root.join(rel);
1246        fs::create_dir_all(abs.parent().unwrap()).unwrap();
1247        fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
1248    }
1249
1250    /// Convenience: write a typed content file with summary/updated/extras.
1251    fn write_doc(
1252        store: &Store,
1253        rel: &str,
1254        type_: &str,
1255        summary: Option<&str>,
1256        updated: Option<&str>,
1257        extra_yaml: &str,
1258    ) {
1259        let mut fm = format!("type: {type_}\n");
1260        if let Some(s) = summary {
1261            fm.push_str(&format!("summary: {s}\n"));
1262        }
1263        if let Some(u) = updated {
1264            fm.push_str(&format!("updated: {u}\n"));
1265        }
1266        fm.push_str(extra_yaml);
1267        write_raw(store, rel, fm.trim_end(), "\nbody text\n");
1268    }
1269
1270    fn read(store: &Store, rel: &str) -> String {
1271        fs::read_to_string(store.root.join(rel)).unwrap()
1272    }
1273
1274    fn exists(store: &Store, rel: &str) -> bool {
1275        store.root.join(rel).exists()
1276    }
1277
1278    /// Collect every `index.md` + `index.jsonl` under the store, mapped to its
1279    /// bytes — the surface the byte-identity invariant compares.
1280    fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
1281        let mut out = BTreeMap::new();
1282        for entry in walkdir::WalkDir::new(&store.root)
1283            .into_iter()
1284            .filter_map(|e| e.ok())
1285        {
1286            let p = entry.path();
1287            if is_index_artifact(p) {
1288                let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
1289                out.insert(rel, fs::read_to_string(p).unwrap());
1290            }
1291        }
1292        out
1293    }
1294
1295    // ── build_type_folder + to_markdown ──────────────────────────────────
1296
1297    #[test]
1298    fn type_folder_aggregates_across_shards_in_recency_order() {
1299        let (_d, store) = mk_store();
1300        // Three emails across two month-shards, deliberately written
1301        // out-of-recency-order on disk.
1302        write_doc(
1303            &store,
1304            "sources/emails/2026/05/b-old.md",
1305            "email",
1306            Some("Older mail"),
1307            Some("2026-05-01T09:00:00Z"),
1308            "",
1309        );
1310        write_doc(
1311            &store,
1312            "sources/emails/2026/06/c-new.md",
1313            "email",
1314            Some("Newest mail"),
1315            Some("2026-06-15T12:00:00Z"),
1316            "",
1317        );
1318        write_doc(
1319            &store,
1320            "sources/emails/2026/05/a-mid.md",
1321            "email",
1322            Some("Middle mail"),
1323            Some("2026-05-20T08:00:00Z"),
1324            "",
1325        );
1326
1327        let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
1328        let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
1329        assert_eq!(
1330            paths,
1331            vec![
1332                "sources/emails/2026/06/c-new.md",
1333                "sources/emails/2026/05/a-mid.md",
1334                "sources/emails/2026/05/b-old.md",
1335            ],
1336            "records must aggregate across shards, newest `updated` first"
1337        );
1338    }
1339
1340    #[test]
1341    fn type_folder_md_format_entries_tags_and_derived_updated() {
1342        let (_d, store) = mk_store();
1343        write_doc(
1344            &store,
1345            "records/contacts/sarah-chen.md",
1346            "contact",
1347            Some("Renewal champion at Acme"),
1348            Some("2026-05-27T10:00:00Z"),
1349            "tags:\n  - renewal\n  - acme\n",
1350        );
1351        write_doc(
1352            &store,
1353            "records/contacts/no-tags.md",
1354            "contact",
1355            Some("Plain contact"),
1356            Some("2026-05-26T10:00:00Z"),
1357            "",
1358        );
1359
1360        let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
1361        let md = idx.to_markdown();
1362
1363        // Frontmatter is exact and the index's own `updated` is the MAX member
1364        // updated (the determinism the byte-identity invariant rests on).
1365        assert!(md.starts_with(
1366            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
1367        ), "frontmatter/heading wrong:\n{md}");
1368
1369        // Entry with tags: `— summary  ·  #tag #tag`.
1370        assert!(
1371            md.contains(
1372                "- [[records/contacts/sarah-chen]] — Renewal champion at Acme  ·  #renewal #acme\n"
1373            ),
1374            "tagged entry wrong:\n{md}"
1375        );
1376        // Entry without tags omits the `  ·  ` suffix entirely.
1377        assert!(
1378            md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
1379            "untagged entry wrong:\n{md}"
1380        );
1381        assert!(
1382            !md.contains("Plain contact  ·"),
1383            "untagged entry must not emit a tag separator"
1384        );
1385        // No `## More` below the cap.
1386        assert!(!md.contains("## More"), "no footer expected under the cap");
1387    }
1388
1389    #[test]
1390    fn missing_summary_becomes_placeholder_not_invented() {
1391        let (_d, store) = mk_store();
1392        write_doc(
1393            &store,
1394            "records/notes/x.md",
1395            "note",
1396            None,
1397            Some("2026-05-27T10:00:00Z"),
1398            "",
1399        );
1400        let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
1401        assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
1402        let md = idx.to_markdown();
1403        assert!(
1404            md.contains("- [[records/notes/x]] — (no summary)\n"),
1405            "missing summary must render the placeholder, not invent text:\n{md}"
1406        );
1407    }
1408
1409    // ── to_jsonl ─────────────────────────────────────────────────────────
1410
1411    #[test]
1412    fn jsonl_is_complete_structured_and_round_trips() {
1413        let (_d, store) = mk_store();
1414        write_doc(
1415            &store,
1416            "records/expenses/2026/05/e1.md",
1417            "expense",
1418            Some("Lunch with vendor"),
1419            Some("2026-05-10T10:00:00Z"),
1420            "created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ntags:\n  - food\nlinks:\n  - wiki/themes/spend\n",
1421        );
1422        write_doc(
1423            &store,
1424            "records/expenses/2026/06/e2.md",
1425            "expense",
1426            Some("Cloud bill"),
1427            Some("2026-06-01T10:00:00Z"),
1428            "amount: 100\n",
1429        );
1430
1431        let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
1432        let jsonl = idx.to_jsonl();
1433        let lines: Vec<&str> = jsonl.lines().collect();
1434        assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
1435
1436        // Newest first (e2), and each line parses back to an equal record.
1437        let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
1438        assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
1439        assert_eq!(
1440            r0, idx.records[0],
1441            "jsonl line must round-trip to the record"
1442        );
1443
1444        // The first (data) record carries every reserved field + the extras in
1445        // `fields` (status/amount), and links/tags verbatim.
1446        let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
1447        assert_eq!(r1.type_, "expense");
1448        assert_eq!(r1.summary, "Lunch with vendor");
1449        assert_eq!(r1.tags, vec!["food".to_string()]);
1450        assert_eq!(r1.links, vec!["wiki/themes/spend".to_string()]);
1451        assert_eq!(
1452            r1.created,
1453            Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
1454        );
1455        assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
1456        assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
1457        // Reserved keys never leak into `fields`.
1458        for reserved in [
1459            "path", "type", "summary", "tags", "links", "created", "updated",
1460        ] {
1461            assert!(
1462                !r1.fields.contains_key(reserved),
1463                "reserved key {reserved} must not appear in fields"
1464            );
1465        }
1466
1467        // Stable key order: declared fields first, then sorted extras.
1468        assert!(
1469            lines[1].starts_with(
1470                r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["wiki/themes/spend"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
1471            ),
1472            "jsonl key order not stable:\n{}",
1473            lines[1]
1474        );
1475        // The flattened extras come in BTreeMap (sorted) order: amount < status.
1476        assert!(
1477            lines[1].ends_with(r#""amount":42,"status":"paid"}"#),
1478            "extras must be sorted:\n{}",
1479            lines[1]
1480        );
1481    }
1482
1483    // ── cap + footer ─────────────────────────────────────────────────────
1484
1485    #[test]
1486    fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
1487        let (_d, store) = mk_store();
1488        let total = MD_CAP + 7;
1489        for i in 0..total {
1490            // Distinct, monotonically increasing `updated` so order is total.
1491            let day = 1 + (i % 27);
1492            let rel = format!("sources/emails/2026/05/m-{i:04}.md");
1493            let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
1494            write_doc(
1495                &store,
1496                &rel,
1497                "email",
1498                Some(&format!("mail {i}")),
1499                Some(&updated),
1500                "",
1501            );
1502        }
1503        let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
1504        assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
1505
1506        let md = idx.to_markdown();
1507        let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
1508        assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
1509
1510        assert!(
1511            md.contains("## More\n\n"),
1512            "over-cap md needs a More footer"
1513        );
1514        assert!(
1515            md.contains(&format!(
1516                "This folder has {total} files. The 500 most recent are listed above.\n"
1517            )),
1518            "footer count wrong:\n{md}"
1519        );
1520        assert!(
1521            md.contains(
1522                "Use `dbmd index query --type email --in sources` for the complete catalog.\n"
1523            ),
1524            "footer must infer type=email layer=sources:\n{md}"
1525        );
1526
1527        let jsonl = idx.to_jsonl();
1528        assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
1529    }
1530
1531    // ── sort total order ─────────────────────────────────────────────────
1532
1533    #[test]
1534    fn sort_breaks_ties_by_path_and_puts_undated_last() {
1535        let mut recs = vec![
1536            rec("z/a.md", Some("2026-05-01T00:00:00Z")),
1537            rec("a/b.md", Some("2026-05-01T00:00:00Z")), // same updated, path < z/a
1538            rec("m/c.md", None),                         // undated → last
1539            rec("b/d.md", Some("2026-06-01T00:00:00Z")), // newest
1540        ];
1541        sort_records(&mut recs);
1542        let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
1543        assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
1544    }
1545
1546    fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
1547        IndexRecord {
1548            path: PathBuf::from(path),
1549            type_: "t".into(),
1550            summary: "s".into(),
1551            tags: vec![],
1552            links: vec![],
1553            created: None,
1554            updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
1555            fields: BTreeMap::new(),
1556        }
1557    }
1558
1559    // ── build_layer / build_root ─────────────────────────────────────────
1560
1561    #[test]
1562    fn layer_index_lists_type_folders_with_counts_and_preview() {
1563        let (_d, store) = mk_store();
1564        write_doc(
1565            &store,
1566            "records/contacts/a.md",
1567            "contact",
1568            Some("Contact A older"),
1569            Some("2026-05-01T00:00:00Z"),
1570            "",
1571        );
1572        write_doc(
1573            &store,
1574            "records/contacts/b.md",
1575            "contact",
1576            Some("Contact B newest"),
1577            Some("2026-05-09T00:00:00Z"),
1578            "",
1579        );
1580        write_doc(
1581            &store,
1582            "records/companies/x.md",
1583            "company",
1584            Some("Acme Inc"),
1585            Some("2026-05-05T00:00:00Z"),
1586            "",
1587        );
1588        // build the type-folder artifacts first (layer preview reads their jsonl)
1589        Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
1590        Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
1591
1592        Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
1593        let md = read(&store, "records/index.md");
1594
1595        assert!(
1596            md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
1597            "layer fm:\n{md}"
1598        );
1599        // Alphabetical type-folder order: companies before contacts.
1600        let companies_at = md.find("companies/index").unwrap();
1601        let contacts_at = md.find("contacts/index").unwrap();
1602        assert!(
1603            companies_at < contacts_at,
1604            "type folders must be alphabetical"
1605        );
1606        // Count + display + newest-summary preview.
1607        assert!(
1608            md.contains("- [[records/contacts/index|Contacts]] (2) — Contact B newest\n"),
1609            "contacts entry:\n{md}"
1610        );
1611        assert!(
1612            md.contains("- [[records/companies/index|Companies]] (1) — Acme Inc\n"),
1613            "companies entry:\n{md}"
1614        );
1615        // Layer `updated` is the max across children (contacts b = 05-09).
1616        assert!(
1617            md.contains("updated: 2026-05-09T00:00:00Z\n"),
1618            "layer updated must be max child:\n{md}"
1619        );
1620    }
1621
1622    #[test]
1623    fn root_index_groups_layers_with_totals_and_per_type_counts() {
1624        let (_d, store) = mk_store();
1625        write_doc(
1626            &store,
1627            "sources/emails/2026/05/a.md",
1628            "email",
1629            Some("Mail"),
1630            Some("2026-05-01T00:00:00Z"),
1631            "",
1632        );
1633        write_doc(
1634            &store,
1635            "sources/docs/d.md",
1636            "doc",
1637            Some("Doc"),
1638            Some("2026-05-02T00:00:00Z"),
1639            "",
1640        );
1641        write_doc(
1642            &store,
1643            "records/contacts/c.md",
1644            "contact",
1645            Some("C"),
1646            Some("2026-05-03T00:00:00Z"),
1647            "",
1648        );
1649        // wiki empty → no Wiki section
1650
1651        Index::rebuild_all(&store).unwrap();
1652        let md = read(&store, "index.md");
1653
1654        assert!(
1655            md.starts_with("---\ntype: index\nscope: root\n"),
1656            "root fm:\n{md}"
1657        );
1658        assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
1659        // Layer heading with total count; Sources before Records (canonical).
1660        let sources_h = md
1661            .find("## Sources (2)")
1662            .expect("sources heading w/ total 2");
1663        let records_h = md
1664            .find("## Records (1)")
1665            .expect("records heading w/ total 1");
1666        assert!(sources_h < records_h, "Sources must precede Records");
1667        assert!(!md.contains("## Wiki"), "empty layer gets no section");
1668        // Per-type sub-entries with (N), no preview at root.
1669        assert!(
1670            md.contains("- [[sources/docs/index|Docs]] (1)\n"),
1671            "root docs entry:\n{md}"
1672        );
1673        assert!(
1674            md.contains("- [[sources/emails/index|Emails]] (1)\n"),
1675            "root emails entry:\n{md}"
1676        );
1677        assert!(
1678            md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
1679            "root contacts entry:\n{md}"
1680        );
1681        assert!(!md.contains("— "), "root entries carry no preview text");
1682    }
1683
1684    // ── write-through == rebuild (THE invariant) ─────────────────────────
1685
1686    #[test]
1687    fn on_write_matches_rebuild_byte_for_byte() {
1688        // Build a store incrementally via on_write, and a second identical store
1689        // via a single rebuild_all, then assert every index artifact is equal.
1690        let (_d1, wt) = mk_store();
1691        let (_d2, rb) = mk_store();
1692
1693        let docs: &[(&str, &str, &str, &str, &str)] = &[
1694            (
1695                "sources/emails/2026/05/e1.md",
1696                "email",
1697                "First mail",
1698                "2026-05-01T10:00:00Z",
1699                "tags:\n  - inbox\n",
1700            ),
1701            (
1702                "sources/emails/2026/06/e2.md",
1703                "email",
1704                "Second mail",
1705                "2026-06-01T10:00:00Z",
1706                "",
1707            ),
1708            (
1709                "records/contacts/sarah.md",
1710                "contact",
1711                "Sarah",
1712                "2026-05-15T10:00:00Z",
1713                "links:\n  - wiki/people/sarah\n",
1714            ),
1715            (
1716                "records/contacts/elena.md",
1717                "contact",
1718                "Elena",
1719                "2026-05-20T10:00:00Z",
1720                "status: active\n",
1721            ),
1722            (
1723                "wiki/people/sarah.md",
1724                "wiki-page",
1725                "Sarah bio",
1726                "2026-05-21T10:00:00Z",
1727                "",
1728            ),
1729        ];
1730
1731        for (rel, t, sum, upd, extra) in docs {
1732            write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
1733            write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
1734            Index::on_write(&wt, Path::new(rel)).unwrap();
1735        }
1736        Index::rebuild_all(&rb).unwrap();
1737
1738        let a = snapshot_artifacts(&wt);
1739        let b = snapshot_artifacts(&rb);
1740        assert_eq!(
1741            a.keys().collect::<Vec<_>>(),
1742            b.keys().collect::<Vec<_>>(),
1743            "same set of index artifacts must exist"
1744        );
1745        for (k, v) in &a {
1746            assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
1747        }
1748        // Sanity: artifacts actually exist (not a vacuous comparison of empties).
1749        assert!(a.contains_key("index.md"));
1750        assert!(a.contains_key("sources/emails/index.jsonl"));
1751        assert!(a.contains_key("records/contacts/index.md"));
1752    }
1753
1754    /// Regression (O(changed) bound, not just correctness): a loop op must
1755    /// recompute its parent rollups from the type-folder `index.jsonl` sidecars
1756    /// — never by walking the content tree of *sibling* folders it wasn't asked
1757    /// about. The byte-identity property test (which always indexes every folder
1758    /// before comparing) can't catch a violation, because a full-store walk
1759    /// produces the *correct* counts too; it just does so in `O(store files)`.
1760    ///
1761    /// The behavioral fingerprint of the old `update_parents → build_layer /
1762    /// build_root` (which called `walk_type_folder_files` on every type-folder in
1763    /// the store): a single `on_write` to `records/contacts/sarah.md` would
1764    /// surface, in the layer + root rollups, the file count of
1765    /// `records/companies` — a sibling that has content on disk but was NEVER
1766    /// passed to a write/index op, so it has no `index.jsonl`. An O(changed) loop
1767    /// op cannot "see" that un-indexed folder; a whole-store walk can. So this
1768    /// asserts the rollups reflect ONLY the sidecar-indexed folder, proving no
1769    /// content-tree walk happened.
1770    #[test]
1771    fn loop_op_does_not_walk_sibling_content_tree() {
1772        let (_d, store) = mk_store();
1773
1774        // A sibling type-folder with real content on disk, but deliberately
1775        // never indexed (no on_write / write_level / rebuild over it) ⇒ no
1776        // `records/companies/index.jsonl` exists.
1777        write_doc(
1778            &store,
1779            "records/companies/acme.md",
1780            "company",
1781            Some("Acme Inc"),
1782            Some("2026-05-05T00:00:00Z"),
1783            "",
1784        );
1785        write_doc(
1786            &store,
1787            "records/companies/globex.md",
1788            "company",
1789            Some("Globex"),
1790            Some("2026-05-06T00:00:00Z"),
1791            "",
1792        );
1793        assert!(
1794            !exists(&store, "records/companies/index.jsonl"),
1795            "precondition: companies must be un-indexed"
1796        );
1797
1798        // The ONLY loop op: a single write to a different type-folder.
1799        write_doc(
1800            &store,
1801            "records/contacts/sarah.md",
1802            "contact",
1803            Some("Sarah"),
1804            Some("2026-05-15T00:00:00Z"),
1805            "",
1806        );
1807        Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
1808
1809        // The written folder is reflected in both rollups...
1810        let layer_md = read(&store, "records/index.md");
1811        let root_md = read(&store, "index.md");
1812        // (layer rollup appends a summary preview, root does not)
1813        assert!(
1814            layer_md.contains("- [[records/contacts/index|Contacts]] (1) — Sarah\n"),
1815            "layer must reflect the written folder:\n{layer_md}"
1816        );
1817        assert!(
1818            root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
1819            "root must reflect the written folder:\n{root_md}"
1820        );
1821
1822        // ...but the un-indexed sibling must be INVISIBLE to a loop op. If the
1823        // rollups mention `records/companies` at all, `on_write` walked the whole
1824        // content tree — the O(store) regression.
1825        assert!(
1826            !layer_md.contains("companies"),
1827            "loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
1828        );
1829        assert!(
1830            !root_md.contains("companies"),
1831            "loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
1832        );
1833        // The layer's only child is contacts ⇒ its total is exactly 1, not 3.
1834        assert!(
1835            root_md.contains("## Records (1)"),
1836            "root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
1837        );
1838
1839        // And the sidecar-derived count IS what a full walk WOULD yield once the
1840        // sibling is indexed too — i.e. the fix changes cost, not the eventual
1841        // result. Index companies, then confirm the rollups now (and only now)
1842        // include it, byte-identical to a from-scratch rebuild.
1843        let (_d2, rb) = mk_store();
1844        for (rel, t, s, u) in [
1845            (
1846                "records/companies/acme.md",
1847                "company",
1848                "Acme Inc",
1849                "2026-05-05T00:00:00Z",
1850            ),
1851            (
1852                "records/companies/globex.md",
1853                "company",
1854                "Globex",
1855                "2026-05-06T00:00:00Z",
1856            ),
1857            (
1858                "records/contacts/sarah.md",
1859                "contact",
1860                "Sarah",
1861                "2026-05-15T00:00:00Z",
1862            ),
1863        ] {
1864            write_doc(&rb, rel, t, Some(s), Some(u), "");
1865        }
1866        Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
1867        Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
1868        Index::rebuild_all(&rb).unwrap();
1869        let a = snapshot_artifacts(&store);
1870        let b = snapshot_artifacts(&rb);
1871        assert_eq!(
1872            a.keys().collect::<BTreeSet<_>>(),
1873            b.keys().collect::<BTreeSet<_>>(),
1874            "same artifact set after indexing both folders"
1875        );
1876        for (k, v) in &a {
1877            assert_eq!(
1878                v, &b[k],
1879                "after indexing the sibling too, loop result must equal rebuild for {k}"
1880            );
1881        }
1882        assert!(
1883            read(&store, "index.md").contains("## Records (3)"),
1884            "now that both folders are indexed, the root total is 3"
1885        );
1886    }
1887
1888    /// Regression: a wiki-page filed at the path the toolkit ITSELF computes
1889    /// (`Store::shard_path_for`) must be indexable end-to-end. The bug was that
1890    /// `shard_path_for("wiki-page", …)` returned a 2-component `wiki/<file>`
1891    /// path, which `type_folder_of` treats as having no type-folder. That made
1892    /// the producer (path computation) disagree with the consumer (index): the
1893    /// loop path crashed (`on_write` → `Err`, it tried to write `index.md`
1894    /// *inside* a file) while the sweep path silently dropped the page from
1895    /// every catalog. This test drives both paths through the real
1896    /// `shard_path_for` output and asserts (1) `on_write` succeeds, (2) the page
1897    /// appears in the rebuilt catalog, and (3) write-through == rebuild.
1898    #[test]
1899    fn wiki_page_at_shard_path_for_is_indexable_end_to_end() {
1900        let (_d1, wt) = mk_store();
1901        let (_d2, rb) = mk_store();
1902
1903        // The toolkit's own canonical write path for a wiki-page.
1904        let rel = wt
1905            .shard_path_for(
1906                "wiki-page",
1907                &crate::parser::Frontmatter::default(),
1908                "renewal-theme",
1909            )
1910            .unwrap();
1911        let rel_str = path_to_unix(&rel);
1912        // Guard the precondition the consumer requires: 3+ components so
1913        // `type_folder_of` resolves a real `<layer>/<type-folder>`.
1914        assert!(
1915            type_folder_of(&rel).is_some(),
1916            "shard_path_for produced a path the index cannot file: {rel_str}"
1917        );
1918
1919        write_doc(
1920            &wt,
1921            &rel_str,
1922            "wiki-page",
1923            Some("Renewal theme"),
1924            Some("2026-05-21T10:00:00Z"),
1925            "",
1926        );
1927        write_doc(
1928            &rb,
1929            &rel_str,
1930            "wiki-page",
1931            Some("Renewal theme"),
1932            Some("2026-05-21T10:00:00Z"),
1933            "",
1934        );
1935
1936        // (1) Loop path must NOT error (the old `wiki/<file>` shape returned
1937        // Err(Io(NotADirectory))).
1938        Index::on_write(&wt, &rel)
1939            .expect("on_write must succeed for a toolkit-computed wiki-page path");
1940        Index::rebuild_all(&rb).unwrap();
1941
1942        // (2) The page is present in the rebuilt catalog (the old flat-path bug
1943        // silently omitted it from every artifact). The individual page link
1944        // lives in the *type-folder* index; the *layer* index rolls the
1945        // type-folder up — assert both, since the bug erased both.
1946        let page_link = wiki_target(&rel); // wiki/topics/renewal-theme
1947        let tf_md = read(&rb, "wiki/topics/index.md");
1948        assert!(
1949            tf_md.contains(&format!("[[{page_link}]]")),
1950            "type-folder index must list the page link, got:\n{tf_md}"
1951        );
1952        assert!(
1953            exists(&rb, "wiki/topics/index.jsonl"),
1954            "type-folder jsonl must exist"
1955        );
1956        assert!(
1957            read(&rb, "wiki/topics/index.jsonl").contains(&rel_str),
1958            "type-folder jsonl must contain the page row"
1959        );
1960        // The layer index rolls the type-folder up (proves the page's folder is
1961        // visible to the layer catalog, not dropped).
1962        let layer_md = read(&rb, "wiki/index.md");
1963        assert!(
1964            layer_md.contains("wiki/topics/index"),
1965            "layer index must roll up the wiki/topics type-folder, got:\n{layer_md}"
1966        );
1967
1968        // (3) Write-through equals rebuild byte-for-byte — loop and sweep agree.
1969        let a = snapshot_artifacts(&wt);
1970        let b = snapshot_artifacts(&rb);
1971        assert_eq!(
1972            a.keys().collect::<Vec<_>>(),
1973            b.keys().collect::<Vec<_>>(),
1974            "loop and sweep must produce the same artifact set"
1975        );
1976        for (k, v) in &a {
1977            assert_eq!(
1978                v, &b[k],
1979                "wiki-page artifact {k} differs between on_write and rebuild"
1980            );
1981        }
1982    }
1983
1984    #[test]
1985    fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
1986        let (_d1, wt) = mk_store();
1987        let (_d2, rb) = mk_store();
1988        let total = MD_CAP + 3; // 503 files; removing one keeps md full at 500
1989        let mut all_rels = Vec::new();
1990        for i in 0..total {
1991            let rel = format!("sources/emails/2026/05/m-{i:04}.md");
1992            // `updated` strictly increasing across i by varying both minute and second
1993            let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
1994            write_doc(
1995                &wt,
1996                &rel,
1997                "email",
1998                Some(&format!("mail {i}")),
1999                Some(&updated),
2000                "",
2001            );
2002            write_doc(
2003                &rb,
2004                &rel,
2005                "email",
2006                Some(&format!("mail {i}")),
2007                Some(&updated),
2008                "",
2009            );
2010            all_rels.push(rel);
2011        }
2012        // Build write-through index, then remove the single newest file.
2013        Index::rebuild_all(&wt).unwrap();
2014        let newest = &all_rels[total - 1]; // highest i = newest updated
2015        fs::remove_file(wt.root.join(newest)).unwrap();
2016        Index::on_remove(&wt, Path::new(newest)).unwrap();
2017
2018        // Rebuild side: same end state (file physically absent).
2019        fs::remove_file(rb.root.join(newest)).unwrap();
2020        Index::rebuild_all(&rb).unwrap();
2021
2022        let a = snapshot_artifacts(&wt);
2023        let b = snapshot_artifacts(&rb);
2024        for (k, v) in &a {
2025            assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
2026        }
2027
2028        // The md must still hold exactly 500 entries (the 501st got pulled in)
2029        // and the removed file must be gone from both artifacts.
2030        let md = read(&wt, "sources/emails/index.md");
2031        assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
2032        // Removed (newest) file is gone from the bare-path md and the .md jsonl.
2033        assert!(
2034            !md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
2035            "removed file must not be listed in md"
2036        );
2037        // The file previously at rank 501 (excluded under the cap) is `all_rels[2]`
2038        // — `updated` increases with index, so newest-first rank 500 = index 2.
2039        // After dropping the newest it shifts into the visible 500.
2040        let pulled_in = &all_rels[2];
2041        assert!(
2042            md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
2043            "the 501st-most-recent must be pulled into the browse view after a removal"
2044        );
2045        assert!(
2046            md.contains(&format!("This folder has {} files.", total - 1)),
2047            "footer count must decrement:\n{}",
2048            md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
2049        );
2050        let jsonl = read(&wt, "sources/emails/index.jsonl");
2051        assert_eq!(
2052            jsonl.lines().count(),
2053            total - 1,
2054            "jsonl loses exactly the removed file"
2055        );
2056        assert!(
2057            !jsonl.contains(&path_to_unix(Path::new(newest))),
2058            "removed file must be gone from the jsonl too"
2059        );
2060    }
2061
2062    #[test]
2063    fn on_rename_cross_folder_matches_rebuild() {
2064        let (_d1, wt) = mk_store();
2065        let (_d2, rb) = mk_store();
2066        // Seed both stores identically.
2067        let seed: &[(&str, &str, &str, &str)] = &[
2068            (
2069                "records/contacts/a.md",
2070                "contact",
2071                "A",
2072                "2026-05-01T00:00:00Z",
2073            ),
2074            (
2075                "records/contacts/b.md",
2076                "contact",
2077                "B",
2078                "2026-05-02T00:00:00Z",
2079            ),
2080            (
2081                "records/companies/x.md",
2082                "company",
2083                "X",
2084                "2026-05-03T00:00:00Z",
2085            ),
2086        ];
2087        for (rel, t, s, u) in seed {
2088            write_doc(&wt, rel, t, Some(s), Some(u), "");
2089            write_doc(&rb, rel, t, Some(s), Some(u), "");
2090        }
2091        Index::rebuild_all(&wt).unwrap();
2092
2093        // Rename contacts/b.md -> companies/b.md (cross type-folder). The file's
2094        // `type` changes to match its new folder, as a real `dbmd rename` would.
2095        let old = "records/contacts/b.md";
2096        let new = "records/companies/b.md";
2097        fs::create_dir_all(wt.root.join("records/companies")).unwrap();
2098        fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
2099        // (type stays "contact" here; index copies frontmatter verbatim — the
2100        // test only asserts placement + parity with rebuild.)
2101        Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
2102
2103        // Rebuild side: same end state.
2104        fs::create_dir_all(rb.root.join("records/companies")).unwrap();
2105        fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
2106        Index::rebuild_all(&rb).unwrap();
2107
2108        let a = snapshot_artifacts(&wt);
2109        let b = snapshot_artifacts(&rb);
2110        assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
2111        for (k, v) in &a {
2112            assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
2113        }
2114        // Concretely: b is gone from contacts, present in companies.
2115        let contacts = read(&wt, "records/contacts/index.md");
2116        assert!(!contacts.contains("records/contacts/b]]"));
2117        let companies = read(&wt, "records/companies/index.md");
2118        assert!(companies.contains("[[records/companies/b]]"));
2119    }
2120
2121    #[test]
2122    fn on_write_updates_existing_entry_in_place() {
2123        let (_d, store) = mk_store();
2124        write_doc(
2125            &store,
2126            "records/contacts/a.md",
2127            "contact",
2128            Some("Original"),
2129            Some("2026-05-01T00:00:00Z"),
2130            "",
2131        );
2132        Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2133        // Edit the same file: new summary + newer updated.
2134        write_doc(
2135            &store,
2136            "records/contacts/a.md",
2137            "contact",
2138            Some("Revised"),
2139            Some("2026-05-09T00:00:00Z"),
2140            "",
2141        );
2142        Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2143
2144        let jsonl = read(&store, "records/contacts/index.jsonl");
2145        assert_eq!(
2146            jsonl.lines().count(),
2147            1,
2148            "upsert must not duplicate the line"
2149        );
2150        assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
2151        assert!(
2152            !jsonl.contains("Original"),
2153            "stale line must be gone (compacted)"
2154        );
2155        let md = read(&store, "records/contacts/index.md");
2156        assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
2157        assert!(
2158            md.contains("updated: 2026-05-09T00:00:00Z\n"),
2159            "index updated must track the newer member"
2160        );
2161    }
2162
2163    // ── dry-run + cleanup ────────────────────────────────────────────────
2164
2165    #[test]
2166    fn dry_run_emits_separators_and_writes_nothing() {
2167        let (_d, store) = mk_store();
2168        write_doc(
2169            &store,
2170            "sources/emails/2026/05/a.md",
2171            "email",
2172            Some("Mail"),
2173            Some("2026-05-01T00:00:00Z"),
2174            "",
2175        );
2176        let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
2177            .unwrap();
2178        assert!(
2179            out.contains("--- sources/emails/index.md ---\n"),
2180            "md separator:\n{out}"
2181        );
2182        assert!(
2183            out.contains("--- sources/emails/index.jsonl ---\n"),
2184            "jsonl separator:\n{out}"
2185        );
2186        assert!(
2187            out.contains("- [[sources/emails/2026/05/a]] — Mail"),
2188            "md body present"
2189        );
2190        // Nothing was written to disk.
2191        assert!(
2192            !exists(&store, "sources/emails/index.md"),
2193            "dry-run must not write"
2194        );
2195        assert!(
2196            !exists(&store, "sources/emails/index.jsonl"),
2197            "dry-run must not write"
2198        );
2199    }
2200
2201    #[test]
2202    fn cleanup_removes_noncanonical_and_empty_indexes() {
2203        let (_d, store) = mk_store();
2204        write_doc(
2205            &store,
2206            "sources/emails/2026/05/a.md",
2207            "email",
2208            Some("Mail"),
2209            Some("2026-05-01T00:00:00Z"),
2210            "",
2211        );
2212        // A stray index inside a date-shard (non-canonical) ...
2213        fs::write(
2214            store.root.join("sources/emails/2026/05/index.md"),
2215            "stale\n",
2216        )
2217        .unwrap();
2218        fs::write(
2219            store.root.join("sources/emails/2026/05/index.jsonl"),
2220            "stale\n",
2221        )
2222        .unwrap();
2223        // ... and an index in an empty type-folder.
2224        fs::create_dir_all(store.root.join("records/empty")).unwrap();
2225        fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
2226
2227        Index::cleanup(&store).unwrap();
2228
2229        assert!(
2230            !exists(&store, "sources/emails/2026/05/index.md"),
2231            "shard index must be deleted"
2232        );
2233        assert!(
2234            !exists(&store, "sources/emails/2026/05/index.jsonl"),
2235            "shard jsonl must be deleted"
2236        );
2237        assert!(
2238            !exists(&store, "records/empty/index.md"),
2239            "empty-folder index must be deleted"
2240        );
2241        // The canonical type-folder file itself is untouched by cleanup.
2242        assert!(exists(&store, "sources/emails/2026/05/a.md"));
2243    }
2244
2245    #[test]
2246    fn rebuild_deletes_stale_indexes_for_emptied_folders() {
2247        let (_d, store) = mk_store();
2248        write_doc(
2249            &store,
2250            "records/contacts/a.md",
2251            "contact",
2252            Some("A"),
2253            Some("2026-05-01T00:00:00Z"),
2254            "",
2255        );
2256        Index::rebuild_all(&store).unwrap();
2257        assert!(exists(&store, "records/contacts/index.md"));
2258        assert!(exists(&store, "records/index.md"));
2259        assert!(exists(&store, "index.md"));
2260
2261        // Empty the folder entirely, then rebuild: all three levels vanish.
2262        fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
2263        Index::rebuild_all(&store).unwrap();
2264        assert!(
2265            !exists(&store, "records/contacts/index.md"),
2266            "emptied type-folder index gone"
2267        );
2268        assert!(
2269            !exists(&store, "records/index.md"),
2270            "now-empty layer index gone"
2271        );
2272        assert!(!exists(&store, "index.md"), "now-empty root index gone");
2273    }
2274
2275    // ── randomized parity (property-style) ───────────────────────────────
2276
2277    #[test]
2278    fn property_writethrough_equals_rebuild_under_mixed_ops() {
2279        // Deterministic pseudo-random op sequence (no rand crate): a small LCG.
2280        let (_d1, wt) = mk_store();
2281        let (_d2, rb) = mk_store();
2282        let mut seed: u64 = 0x9E3779B97F4A7C15;
2283        let mut next = || {
2284            seed = seed
2285                .wrapping_mul(6364136223846793005)
2286                .wrapping_add(1442695040888963407);
2287            (seed >> 33) as u32
2288        };
2289
2290        let folders = ["sources/emails", "records/contacts", "wiki/people"];
2291        let types = ["email", "contact", "wiki-page"];
2292        let mut live: Vec<String> = Vec::new(); // store-relative paths that exist
2293
2294        for step in 0..120u32 {
2295            let r = next();
2296            let op = r % 10;
2297            if op < 6 || live.is_empty() {
2298                // CREATE/UPDATE
2299                let fi = (next() as usize) % folders.len();
2300                let folder = folders[fi];
2301                let id = next() % 40;
2302                let rel = if folder == "sources/emails" {
2303                    let month = 5 + (id % 2); // shard across two months
2304                    format!("{folder}/2026/{month:02}/f-{id:02}.md")
2305                } else {
2306                    format!("{folder}/f-{id:02}.md")
2307                };
2308                // recency varies with step so order is meaningful + total
2309                let updated = format!(
2310                    "2026-05-{:02}T{:02}:{:02}:00Z",
2311                    1 + (step % 27),
2312                    step % 24,
2313                    id % 60
2314                );
2315                let extra = if id % 3 == 0 {
2316                    "tags:\n  - x\n  - y\n"
2317                } else {
2318                    ""
2319                };
2320                write_doc(
2321                    &wt,
2322                    &rel,
2323                    types[fi],
2324                    Some(&format!("sum {step}")),
2325                    Some(&updated),
2326                    extra,
2327                );
2328                write_doc(
2329                    &rb,
2330                    &rel,
2331                    types[fi],
2332                    Some(&format!("sum {step}")),
2333                    Some(&updated),
2334                    extra,
2335                );
2336                Index::on_write(&wt, Path::new(&rel)).unwrap();
2337                if !live.contains(&rel) {
2338                    live.push(rel);
2339                }
2340            } else if op < 8 {
2341                // REMOVE a live file
2342                let idx = (next() as usize) % live.len();
2343                let rel = live.remove(idx);
2344                fs::remove_file(wt.root.join(&rel)).unwrap();
2345                fs::remove_file(rb.root.join(&rel)).ok();
2346                Index::on_remove(&wt, Path::new(&rel)).unwrap();
2347            } else {
2348                // RENAME a live file within the same layer (new id, maybe new type-folder)
2349                let idx = (next() as usize) % live.len();
2350                let old = live[idx].clone();
2351                // pick a destination folder in the same layer-ish set
2352                let fi = (next() as usize) % folders.len();
2353                let folder = folders[fi];
2354                let id = 50 + (next() % 40);
2355                let new = if folder == "sources/emails" {
2356                    format!("{folder}/2026/05/f-{id:02}.md")
2357                } else {
2358                    format!("{folder}/f-{id:02}.md")
2359                };
2360                if new == old || live.contains(&new) {
2361                    continue;
2362                }
2363                fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
2364                fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
2365                fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
2366                fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
2367                Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
2368                live[idx] = new;
2369            }
2370        }
2371
2372        // Now rebuild the rb side from the shared end state and compare.
2373        Index::rebuild_all(&rb).unwrap();
2374        let a = snapshot_artifacts(&wt);
2375        let b = snapshot_artifacts(&rb);
2376        assert_eq!(
2377            a.keys().collect::<BTreeSet<_>>(),
2378            b.keys().collect::<BTreeSet<_>>(),
2379            "write-through and rebuild must produce the same set of artifacts"
2380        );
2381        for (k, v) in &a {
2382            assert_eq!(
2383                v, &b[k],
2384                "INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
2385                b[k]
2386            );
2387        }
2388        assert!(
2389            !a.is_empty(),
2390            "the run must have produced at least one artifact"
2391        );
2392    }
2393}