Skip to main content

dbmd_core/
index.rs

1//! `index` — the hierarchical content catalog.
2//!
3//! A uniform three-level tree: root + per-layer + per-type-folder. **Two
4//! artifacts per type-folder:** the human `index.md` (capped 500, recency
5//! browse) and the machine `index.jsonl` (complete, structured — one JSON
6//! object per file). Both read `summary` + key frontmatter fields + links
7//! directly from each file — there is no extraction logic here.
8//!
9//! **Maintained write-through** by the write commands ([`Index::on_write`] /
10//! [`Index::on_rename`] / [`Index::on_remove`] — the loop path, O(changed), no
11//! store walk); [`Index::rebuild_all`] is the from-scratch SWEEP repair.
12//!
13//! **Key invariant:** write-through must produce a byte-identical `index.md`
14//! and (post-compaction) `index.jsonl` to a full [`Index::rebuild_all`] over
15//! the same end state — the loop path can never drift from the repair path.
16//!
17//! # Implementation notes (deviations the reader should know)
18//!
19//! - **Self-contained, by design.** This module does its own shard-aware folder
20//!   walk, its own minimal frontmatter read, and its own atomic write, using
21//!   only `store.root` (a public field) and the `serde_norway` / `serde_json` /
22//!   `chrono` / `walkdir` crates rather than routing through the sibling
23//!   `store`/`parser` helpers ([`Store::walk_type_folder`],
24//!   [`Store::recent_in_type_folder`], [`parser::read_file`], …). The index has
25//!   to stamp a *deterministic* `updated:` and emit a *canonical, compacted*
26//!   `index.jsonl` (see the two notes below); keeping the read/walk/write local
27//!   is what makes the byte-identity invariant a true byte comparison, free of
28//!   any incidental formatting the shared readers might introduce. The public
29//!   signatures in `lib.rs` are untouched.
30//! - **Deterministic `updated:` on the index files themselves.** An index's own
31//!   `updated` frontmatter is derived as the max `updated` over the files it
32//!   catalogs (max over children for root/layer) — NOT wall-clock-now. This is
33//!   what makes the byte-identity invariant a *true* byte comparison: a
34//!   write-through write and a `rebuild_all` over the same end state stamp the
35//!   same value. (The SPEC's rendered examples show a wall-clock-looking value;
36//!   the conventions list only requires `updated: <RFC3339>`, and the
37//!   property-tested invariant dominates.)
38//! - **`index.jsonl` is always compacted.** Write-through rewrites the affected
39//!   type-folder's jsonl in canonical form (one current line per path, recency
40//!   order) rather than appending superseded/tombstone lines, so the jsonl is
41//!   byte-identical to `rebuild_all` *immediately* (a strictly stronger
42//!   guarantee than the SPEC's "post-compaction"). This keeps the loop cost at
43//!   one sidecar read + one rewrite per touched type-folder — O(folder), the
44//!   sanctioned loop primitive, never a whole-`Store::walk`.
45//! - **Root/layer entry styling** follows plan §index (`(N)` numeric counts;
46//!   layer headings in the root carry the layer's total count) which is more
47//!   specific than the SPEC's illustrative `(42 files)` prose example. Type
48//!   folders are listed alphabetically (a deterministic order a derived artifact
49//!   needs); `scope: type-folder` follows the conventions list, not the one
50//!   SPEC example that wrote `scope: folder`.
51
52use std::collections::{BTreeMap, BTreeSet};
53use std::fs;
54use std::io::Write as _;
55use std::path::{Path, PathBuf};
56
57use chrono::{DateTime, FixedOffset, SecondsFormat};
58use serde::{Deserialize, Serialize};
59use serde_json::Value;
60
61use crate::store::{Layer, Store};
62
63/// The browse-view cap for a type-folder `index.md`.
64const MD_CAP: usize = 500;
65
66/// Placeholder summary for a content file that has no `summary` frontmatter.
67/// The index never invents a real summary — that is `dbmd fm init`'s job; this
68/// marker is what `dbmd validate` keys off (`INDEX`-class issue).
69const MISSING_SUMMARY: &str = "(no summary)";
70
71/// The root `index.md` H1.
72const ROOT_TITLE: &str = "Knowledge base index";
73
74/// Which level of the catalog an [`Index`] represents.
75#[derive(Debug, Clone, PartialEq, Eq)]
76pub enum IndexLevel {
77    /// The store-wide root `index.md` (layers + per-type counts).
78    Root,
79    /// A layer `index.md` (every type-folder under one layer).
80    Layer(Layer),
81    /// A type-folder `index.md` + `index.jsonl` (every file in the folder).
82    TypeFolder(PathBuf),
83}
84
85/// One record in a type-folder's `index.jsonl` — the complete, structured twin
86/// of a single `index.md` browse entry.
87///
88/// `tags` are the document's flat labels; `links` are its concept/relationship
89/// wiki-link targets. Both are copied verbatim from the file — never inferred.
90/// `fields` holds the remaining type-specific frontmatter so the structured
91/// query path can filter on any key without opening the file.
92#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
93pub struct IndexRecord {
94    /// Store-relative path of the file (the upsert key; last-write-wins).
95    /// Serialized with forward slashes regardless of OS (see [`path_serde`]) so
96    /// the `index.jsonl` catalog is byte-portable across platforms.
97    #[serde(with = "path_serde")]
98    pub path: PathBuf,
99    /// The file's `type`.
100    #[serde(rename = "type")]
101    pub type_: String,
102    /// The file's `summary`.
103    pub summary: String,
104    /// The file's flat `tags`.
105    #[serde(default)]
106    pub tags: Vec<String>,
107    /// The file's concept/relationship wiki-link targets (store-relative).
108    #[serde(default)]
109    pub links: Vec<String>,
110    /// `created` timestamp.
111    pub created: Option<DateTime<FixedOffset>>,
112    /// `updated` timestamp (the recency key for the `index.md` cap order).
113    pub updated: Option<DateTime<FixedOffset>>,
114    /// Remaining type-specific frontmatter fields, verbatim.
115    #[serde(flatten)]
116    pub fields: BTreeMap<String, Value>,
117}
118
119/// A built (or being-built) catalog for one [`IndexLevel`], with both rendered
120/// artifacts available. Pure data until written via [`Index::write_level`].
121#[derive(Debug, Clone, PartialEq)]
122pub struct Index {
123    /// Which level this catalog is for.
124    pub level: IndexLevel,
125    /// The complete record set for this level (type-folder level; empty for
126    /// root/layer rollups, which carry only counts).
127    pub records: Vec<IndexRecord>,
128    /// Per-child counts for root/layer rollups (child path → file count).
129    pub child_counts: BTreeMap<PathBuf, usize>,
130}
131
132impl Index {
133    /// Build a type-folder catalog by aggregating across date-shards, producing
134    /// both artifacts. `index.md` selection is recency (updated desc, ties by
135    /// path asc; cap 500 with a `## More` footer over the cap); `index.jsonl`
136    /// holds every file. A file missing `summary` gets a placeholder + a
137    /// validate-detectable issue (the index never invents summaries).
138    pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
139        let rel = normalize_rel(type_folder);
140        let abs = store.root.join(&rel);
141        let mut records = Vec::new();
142        for file_abs in walk_type_folder_files(&abs) {
143            let rel_path =
144                rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
145            records.push(record_from_file(&file_abs, rel_path)?);
146        }
147        sort_records(&mut records);
148        Ok(Index {
149            level: IndexLevel::TypeFolder(rel),
150            records,
151            child_counts: BTreeMap::new(),
152        })
153    }
154
155    /// Build a layer catalog: every non-empty type-folder under the layer with
156    /// `(N)` counts and a newest-file `summary` preview (≤ 80 chars).
157    pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
158        let mut child_counts = BTreeMap::new();
159        for tf in type_folders_in_layer(store, layer) {
160            let abs = store.root.join(&tf);
161            let n = walk_type_folder_files(&abs).len();
162            if n > 0 {
163                child_counts.insert(tf, n);
164            }
165        }
166        Ok(Index {
167            level: IndexLevel::Layer(layer),
168            records: Vec::new(),
169            child_counts,
170        })
171    }
172
173    /// Build the store-wide root catalog: one heading per non-empty layer with
174    /// total count + bulleted per-type sub-entries with `(N)` counts.
175    pub fn build_root(store: &Store) -> crate::Result<Index> {
176        let mut child_counts = BTreeMap::new();
177        for layer in Layer::all() {
178            for tf in type_folders_in_layer(store, layer) {
179                let abs = store.root.join(&tf);
180                let n = walk_type_folder_files(&abs).len();
181                if n > 0 {
182                    child_counts.insert(tf, n);
183                }
184            }
185        }
186        Ok(Index {
187            level: IndexLevel::Root,
188            records: Vec::new(),
189            child_counts,
190        })
191    }
192
193    /// Render this catalog as a canonical `index.md`.
194    pub fn to_markdown(&self) -> String {
195        match &self.level {
196            IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
197            IndexLevel::Layer(layer) => self.render_layer_md(*layer),
198            IndexLevel::Root => self.render_root_md(),
199        }
200    }
201
202    /// Render this type-folder catalog as the complete `index.jsonl` (one JSON
203    /// object per file, stable key order so diffs stay minimal). Type-folder
204    /// level only — root and layer stay markdown rollups.
205    pub fn to_jsonl(&self) -> String {
206        let mut out = String::new();
207        for rec in &self.records {
208            // The record type derives a deterministic, sorted key order
209            // (declared fields first, then the flattened `fields` BTreeMap).
210            let line = serde_json::to_string(rec).expect("IndexRecord serializes");
211            out.push_str(&line);
212            out.push('\n');
213        }
214        out
215    }
216
217    // ── rendering helpers ────────────────────────────────────────────────
218
219    fn render_type_folder_md(&self, folder: &Path) -> String {
220        let folder_disp = path_to_unix(folder);
221        let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
222        let mut s = String::new();
223        s.push_str("---\n");
224        s.push_str("type: index\n");
225        s.push_str("scope: type-folder\n");
226        s.push_str(&format!("folder: {folder_disp}\n"));
227        if let Some(ts) = updated {
228            s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
229        }
230        s.push_str("---\n\n");
231        s.push_str(&format!("# {folder_disp}\n\n"));
232
233        let shown = self.records.len().min(MD_CAP);
234        for rec in self.records.iter().take(shown) {
235            s.push_str(&format_md_entry(rec));
236            s.push('\n');
237        }
238
239        if self.records.len() > MD_CAP {
240            let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
241            let layer = folder
242                .components()
243                .next()
244                .and_then(|c| c.as_os_str().to_str())
245                .unwrap_or("");
246            s.push('\n');
247            s.push_str(&more_footer(self.records.len(), type_, layer));
248        }
249        s
250    }
251
252    /// Store-less layer rollup: counts only, no preview / no derived `updated`
253    /// (a layer index needs each child's on-disk jsonl for those — see
254    /// [`render_layer_md_with_store`], the canonical path every disk write
255    /// uses). This pure-data render is structurally identical sans preview.
256    fn render_layer_md(&self, layer: Layer) -> String {
257        let layer_dir = layer_dir_name(layer);
258        let mut s = String::new();
259        s.push_str("---\n");
260        s.push_str("type: index\n");
261        s.push_str("scope: layer\n");
262        s.push_str(&format!("folder: {layer_dir}\n"));
263        s.push_str("---\n\n");
264        s.push_str(&format!("# {layer_dir}\n\n"));
265        for (tf, n) in &self.child_counts {
266            let tf_unix = path_to_unix(tf);
267            let display = capitalize(folder_basename(tf));
268            s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
269        }
270        s
271    }
272
273    /// Store-less root rollup: counts only (the canonical disk render adds a
274    /// derived `updated` — see [`render_root_md_with_store`]).
275    fn render_root_md(&self) -> String {
276        let mut s = String::new();
277        s.push_str("---\n");
278        s.push_str("type: index\n");
279        s.push_str("scope: root\n");
280        s.push_str("---\n\n");
281        s.push_str(&format!("# {ROOT_TITLE}\n"));
282        for layer in Layer::all() {
283            let layer_dir = layer_dir_name(layer);
284            let prefix = format!("{layer_dir}/");
285            let children: Vec<(&PathBuf, &usize)> = self
286                .child_counts
287                .iter()
288                .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
289                .collect();
290            if children.is_empty() {
291                continue;
292            }
293            let total: usize = children.iter().map(|(_, n)| **n).sum();
294            s.push('\n');
295            s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
296            for (tf, n) in children {
297                let tf_unix = path_to_unix(tf);
298                let display = capitalize(folder_basename(tf));
299                s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
300            }
301        }
302        s
303    }
304}
305
306// ─────────────────────────────────────────────────────────────────────────
307// Write-through + sweep (free functions on the impl block).
308// ─────────────────────────────────────────────────────────────────────────
309
310impl Index {
311    /// **Write-through (loop, O(changed)).** Upsert a new/updated content file.
312    /// Reads the affected type-folder's `index.jsonl` (the sanctioned per-folder
313    /// sidecar read — never a whole-store walk), applies the change, and
314    /// atomically rewrites that folder's `index.md` + `index.jsonl` plus the
315    /// parent layer + root rollups so the artifacts equal a `rebuild_all` over
316    /// the same end state.
317    pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
318        let file_rel = normalize_rel(file);
319        let file_abs = store.root.join(&file_rel);
320        let folder = type_folder_of(&file_rel)
321            .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
322        let record = record_from_file(&file_abs, file_rel.clone())?;
323
324        let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
325        records.retain(|r| r.path != record.path);
326        records.push(record);
327        sort_records(&mut records);
328
329        write_type_folder_artifacts(store, &folder, &records)?;
330        update_parents(store, &folder)?;
331        Ok(())
332    }
333
334    /// **Write-through (loop, O(changed)).** Move a file's entry between
335    /// type-folder indexes (or within, if the same folder) in both `index.md`
336    /// and `index.jsonl`, fixing counts on both sides.
337    pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
338        let old_rel = normalize_rel(old);
339        let new_rel = normalize_rel(new);
340        let old_folder = type_folder_of(&old_rel)
341            .ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
342        let new_folder = type_folder_of(&new_rel)
343            .ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
344
345        // Drop from the old folder.
346        let mut old_records =
347            read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
348        old_records.retain(|r| r.path != old_rel);
349
350        if old_folder == new_folder {
351            // Same folder: re-read the (now-renamed) file and upsert.
352            let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
353            old_records.retain(|r| r.path != record.path);
354            old_records.push(record);
355            sort_records(&mut old_records);
356            write_type_folder_artifacts(store, &old_folder, &old_records)?;
357            update_parents(store, &old_folder)?;
358            return Ok(());
359        }
360
361        // Cross-folder: write the trimmed old folder (or drop its indexes if
362        // now empty), then upsert into the new folder.
363        sort_records(&mut old_records);
364        write_type_folder_artifacts(store, &old_folder, &old_records)?;
365
366        let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
367        let mut new_records =
368            read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
369        new_records.retain(|r| r.path != record.path);
370        new_records.push(record);
371        sort_records(&mut new_records);
372        write_type_folder_artifacts(store, &new_folder, &new_records)?;
373
374        update_parents(store, &old_folder)?;
375        update_parents(store, &new_folder)?;
376        Ok(())
377    }
378
379    /// **Write-through (loop, O(changed)).** Drop a file's entry from both
380    /// `index.md` and `index.jsonl`; decrement counts; if the browse view drops
381    /// below the cap, the next-most-recent is already present in the complete
382    /// jsonl record set and re-renders into the md automatically.
383    pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
384        let file_rel = normalize_rel(file);
385        let folder = type_folder_of(&file_rel)
386            .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
387        let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
388        let before = records.len();
389        records.retain(|r| r.path != file_rel);
390        if records.len() == before {
391            // Nothing to remove; still normalize the folder + parents so the
392            // artifacts stay canonical.
393        }
394        sort_records(&mut records);
395        write_type_folder_artifacts(store, &folder, &records)?;
396        update_parents(store, &folder)?;
397        Ok(())
398    }
399
400    /// **SWEEP repair.** Walk the store once and atomically (re)write root +
401    /// every non-empty layer + every non-empty type-folder `index.md` and
402    /// `index.jsonl` (compacting the jsonl). Also runs [`Index::cleanup`].
403    pub fn rebuild_all(store: &Store) -> crate::Result<()> {
404        Index::cleanup(store)?;
405        for layer in Layer::all() {
406            for tf in type_folders_in_layer(store, layer) {
407                let idx = Index::build_type_folder(store, &tf)?;
408                if idx.records.is_empty() {
409                    continue;
410                }
411                write_type_folder_artifacts(store, &tf, &idx.records)?;
412            }
413            let layer_idx = Index::build_layer(store, layer)?;
414            let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
415            if layer_idx.child_counts.is_empty() {
416                remove_if_exists(&layer_index_md)?;
417            } else {
418                write_atomic(
419                    &layer_index_md,
420                    render_layer_md_with_store(store, &layer_idx),
421                )?;
422            }
423        }
424        let root_idx = Index::build_root(store)?;
425        let root_index_md = store.root.join("index.md");
426        if root_idx.child_counts.is_empty() {
427            remove_if_exists(&root_index_md)?;
428        } else {
429            write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
430        }
431        Ok(())
432    }
433
434    /// Rebuild ONE type-folder's `index.md`/`index.jsonl` from a fresh walk, then
435    /// cascade the new child count up to the layer and root rollups — so a
436    /// scoped `dbmd index rebuild --folder` leaves the hierarchy consistent,
437    /// exactly like `rebuild_all` and the loop-path `on_write` already do.
438    /// (Writing only the folder, as the CLI used to, left stale layer/root
439    /// counts that `validate` would then flag as an index desync.)
440    pub fn rebuild_folder(store: &Store, folder: &Path) -> crate::Result<()> {
441        Self::write_level(store, &IndexLevel::TypeFolder(folder.to_path_buf()))?;
442        update_parents(store, folder)
443    }
444
445    /// Atomically write a single level's artifact(s) to disk.
446    pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
447        match level {
448            IndexLevel::TypeFolder(folder) => {
449                let idx = Index::build_type_folder(store, folder)?;
450                if idx.records.is_empty() {
451                    remove_if_exists(&store.root.join(folder).join("index.md"))?;
452                    remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
453                } else {
454                    write_type_folder_artifacts(store, folder, &idx.records)?;
455                }
456            }
457            IndexLevel::Layer(layer) => {
458                let idx = Index::build_layer(store, *layer)?;
459                let p = store.root.join(layer_dir_name(*layer)).join("index.md");
460                if idx.child_counts.is_empty() {
461                    remove_if_exists(&p)?;
462                } else {
463                    write_atomic(&p, render_layer_md_with_store(store, &idx))?;
464                }
465            }
466            IndexLevel::Root => {
467                let idx = Index::build_root(store)?;
468                let p = store.root.join("index.md");
469                if idx.child_counts.is_empty() {
470                    remove_if_exists(&p)?;
471                } else {
472                    write_atomic(&p, render_root_md_with_store(store, &idx))?;
473                }
474            }
475        }
476        Ok(())
477    }
478
479    /// Render the generated indexes to a string with `--- <path> ---`
480    /// separators instead of writing them (`--dry-run`).
481    pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
482        let mut out = String::new();
483        match level {
484            IndexLevel::TypeFolder(folder) => {
485                let idx = Index::build_type_folder(store, folder)?;
486                let md_path = path_to_unix(&folder.join("index.md"));
487                let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
488                out.push_str(&format!("--- {md_path} ---\n"));
489                out.push_str(&idx.to_markdown());
490                out.push_str(&format!("--- {jsonl_path} ---\n"));
491                out.push_str(&idx.to_jsonl());
492            }
493            IndexLevel::Layer(layer) => {
494                let idx = Index::build_layer(store, *layer)?;
495                let md_path = format!("{}/index.md", layer_dir_name(*layer));
496                out.push_str(&format!("--- {md_path} ---\n"));
497                out.push_str(&render_layer_md_with_store(store, &idx));
498            }
499            IndexLevel::Root => {
500                let idx = Index::build_root(store)?;
501                out.push_str("--- index.md ---\n");
502                out.push_str(&render_root_md_with_store(store, &idx));
503            }
504        }
505        Ok(out)
506    }
507
508    /// Cleanup pass (part of [`Index::rebuild_all`]): delete `index.md` /
509    /// `index.jsonl` in non-canonical folders (empty folders, or date-shards
510    /// that should carry none). Symmetric with index creation.
511    pub fn cleanup(store: &Store) -> crate::Result<()> {
512        for layer in Layer::all() {
513            let layer_dir = store.root.join(layer_dir_name(layer));
514            if !layer_dir.is_dir() {
515                continue;
516            }
517            for tf in type_folders_in_layer(store, layer) {
518                let tf_abs = store.root.join(&tf);
519                // Any index inside a shard (below the type-folder root) is
520                // non-canonical: delete it.
521                for entry in walkdir::WalkDir::new(&tf_abs)
522                    .min_depth(1)
523                    .into_iter()
524                    .filter_map(|e| e.ok())
525                {
526                    let p = entry.path();
527                    if is_index_artifact(p) {
528                        remove_if_exists(p)?;
529                    }
530                }
531                // Empty type-folder → no index at its root either.
532                if walk_type_folder_files(&tf_abs).is_empty() {
533                    remove_if_exists(&tf_abs.join("index.md"))?;
534                    remove_if_exists(&tf_abs.join("index.jsonl"))?;
535                }
536            }
537        }
538        Ok(())
539    }
540}
541
542// ─────────────────────────────────────────────────────────────────────────
543// Private free helpers — all self-contained, none call back into Store/parser.
544// ─────────────────────────────────────────────────────────────────────────
545
546/// Write both artifacts for a type-folder, or delete them if the folder is now
547/// empty. The single funnel both write-through and rebuild go through, so their
548/// output is byte-identical by construction.
549fn write_type_folder_artifacts(
550    store: &Store,
551    folder: &Path,
552    records: &[IndexRecord],
553) -> crate::Result<()> {
554    let folder_abs = store.root.join(folder);
555    let md_path = folder_abs.join("index.md");
556    let jsonl_path = folder_abs.join("index.jsonl");
557    if records.is_empty() {
558        remove_if_exists(&md_path)?;
559        remove_if_exists(&jsonl_path)?;
560        return Ok(());
561    }
562    let idx = Index {
563        level: IndexLevel::TypeFolder(folder.to_path_buf()),
564        records: records.to_vec(),
565        child_counts: BTreeMap::new(),
566    };
567    write_atomic(&md_path, idx.to_markdown())?;
568    write_atomic(&jsonl_path, idx.to_jsonl())?;
569    Ok(())
570}
571
572/// Re-render the layer + root rollups that sit above `folder` — the
573/// **loop path**, O(changed). Counts come from the type-folders' on-disk
574/// `index.jsonl` sidecars ([`child_counts_from_jsonl`]), NOT from a content-tree
575/// walk: a single write touches only the affected layer's sidecars (for the
576/// layer rollup) and one sidecar per type-folder (for the root rollup) — never
577/// the millions of files under the shards. `build_layer` / `build_root` (which
578/// *do* walk the content tree) are reserved for the from-scratch sweeps
579/// ([`Index::rebuild_all`], [`Index::write_level`], [`Index::render_dry_run`]).
580/// The result is byte-identical to those builders because in the loop — exactly
581/// as in `rebuild_all` — every touched folder's jsonl is rewritten before its
582/// parents are rolled up, so `jsonl_record_count == walk_type_folder_files.len()`
583/// for every folder read here.
584fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
585    let layer = folder
586        .components()
587        .next()
588        .and_then(|c| c.as_os_str().to_str())
589        .and_then(layer_from_dir_name);
590    if let Some(layer) = layer {
591        let idx = Index {
592            level: IndexLevel::Layer(layer),
593            records: Vec::new(),
594            child_counts: child_counts_from_jsonl(store, &[layer])?,
595        };
596        let p = store.root.join(layer_dir_name(layer)).join("index.md");
597        if idx.child_counts.is_empty() {
598            remove_if_exists(&p)?;
599        } else {
600            write_atomic(&p, render_layer_md_with_store(store, &idx))?;
601        }
602    }
603    let root = Index {
604        level: IndexLevel::Root,
605        records: Vec::new(),
606        child_counts: child_counts_from_jsonl(store, &Layer::all())?,
607    };
608    let rp = store.root.join("index.md");
609    if root.child_counts.is_empty() {
610        remove_if_exists(&rp)?;
611    } else {
612        write_atomic(&rp, render_root_md_with_store(store, &root))?;
613    }
614    Ok(())
615}
616
617/// Render a layer `index.md`, reading each child's newest summary + max-updated
618/// straight from its on-disk `index.jsonl` (so the rollup matches the folder
619/// artifacts exactly, write-through and rebuild alike).
620fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
621    let layer = match idx.level {
622        IndexLevel::Layer(l) => l,
623        _ => unreachable!("render_layer_md_with_store called on non-layer"),
624    };
625    let layer_dir = layer_dir_name(layer);
626    let mut max_upd: Option<DateTime<FixedOffset>> = None;
627    let mut entries = String::new();
628    for (tf, n) in &idx.child_counts {
629        let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
630        let newest = recs.first();
631        if let Some(u) = newest.and_then(|r| r.updated) {
632            max_upd = Some(match max_upd {
633                Some(cur) if cur >= u => cur,
634                _ => u,
635            });
636        }
637        let tf_unix = path_to_unix(tf);
638        let display = capitalize(folder_basename(tf));
639        let preview = newest
640            .map(|r| truncate(&r.summary, 80))
641            .filter(|p| !p.is_empty() && p != MISSING_SUMMARY);
642        match preview {
643            Some(p) => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n}) — {p}\n")),
644            None => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n")),
645        }
646    }
647    let mut s = String::new();
648    s.push_str("---\n");
649    s.push_str("type: index\n");
650    s.push_str("scope: layer\n");
651    s.push_str(&format!("folder: {layer_dir}\n"));
652    if let Some(ts) = max_upd {
653        s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
654    }
655    s.push_str("---\n\n");
656    s.push_str(&format!("# {layer_dir}\n\n"));
657    s.push_str(&entries);
658    s
659}
660
661/// Render the root `index.md`, taking each child's max-updated from its on-disk
662/// `index.jsonl`.
663fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
664    let mut max_upd: Option<DateTime<FixedOffset>> = None;
665    for tf in idx.child_counts.keys() {
666        let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
667        if let Some(u) = recs.first().and_then(|r| r.updated) {
668            max_upd = Some(match max_upd {
669                Some(cur) if cur >= u => cur,
670                _ => u,
671            });
672        }
673    }
674    let mut s = String::new();
675    s.push_str("---\n");
676    s.push_str("type: index\n");
677    s.push_str("scope: root\n");
678    if let Some(ts) = max_upd {
679        s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
680    }
681    s.push_str("---\n\n");
682    s.push_str(&format!("# {ROOT_TITLE}\n"));
683    for layer in Layer::all() {
684        let layer_dir = layer_dir_name(layer);
685        let prefix = format!("{layer_dir}/");
686        let children: Vec<(&PathBuf, &usize)> = idx
687            .child_counts
688            .iter()
689            .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
690            .collect();
691        if children.is_empty() {
692            continue;
693        }
694        let total: usize = children.iter().map(|(_, n)| **n).sum();
695        s.push('\n');
696        s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
697        for (tf, n) in children {
698            let tf_unix = path_to_unix(tf);
699            let display = capitalize(folder_basename(tf));
700            s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
701        }
702    }
703    s
704}
705
706/// One `index.md` browse line: `- [[path]] — summary  ·  #tag #tag` (the
707/// `  ·  #…` suffix omitted when the file has no tags). The wiki-link target is
708/// the canonical **bare** store-relative path (no `.md` extension — the
709/// doctrine the writers emit and `validate` enforces via
710/// `WIKI_LINK_HAS_EXTENSION`); the jsonl `path` keeps the real on-disk name.
711fn format_md_entry(rec: &IndexRecord) -> String {
712    let path = wiki_target(&rec.path);
713    let mut line = format!("- [[{path}]] — {}", rec.summary);
714    if !rec.tags.is_empty() {
715        let tags = rec
716            .tags
717            .iter()
718            .map(|t| format!("#{t}"))
719            .collect::<Vec<_>>()
720            .join(" ");
721        line.push_str(&format!("  ·  {tags}"));
722    }
723    line
724}
725
726/// The deterministic `## More` footer for an over-cap type-folder.
727fn more_footer(total: usize, type_: &str, layer: &str) -> String {
728    format!(
729        "## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd index query --type {type_} --in {layer}` for the complete catalog.\n"
730    )
731}
732
733/// Canonical total order: `updated` descending (None sorts last), ties broken
734/// by store-relative path ascending. A *total* order, so write-through and
735/// rebuild never disagree on #500 vs #501.
736fn sort_records(records: &mut [IndexRecord]) {
737    records.sort_by(|a, b| {
738        match (b.updated, a.updated) {
739            (Some(bu), Some(au)) => bu.cmp(&au),
740            (Some(_), None) => std::cmp::Ordering::Greater, // a is None → after b
741            (None, Some(_)) => std::cmp::Ordering::Less,    // b is None → after a
742            (None, None) => std::cmp::Ordering::Equal,
743        }
744        .then_with(|| a.path.cmp(&b.path))
745    });
746}
747
748impl IndexRecord {
749    /// Build the [`IndexRecord`] a freshly-rebuilt `index.jsonl` *should* hold
750    /// for the file at `abs` (catalogued under store-relative `rel`).
751    ///
752    /// This is the single canonical projection from frontmatter → sidecar
753    /// record: [`Index::build_type_folder`] uses the same path to write the
754    /// jsonl, so the validator can rebuild the expected record here and compare
755    /// it field-for-field against the committed line — covering **every**
756    /// queryable/dedup field the query path reads (`summary`, `type`, `tags`,
757    /// `links`, `created`, `updated`, and every type-specific `fields` entry
758    /// like `email` / `domain` / `company` / `amount` / `vendor`) without the
759    /// validator hand-rolling (and drifting from) the projection per field.
760    pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
761        record_from_file(abs, rel)
762    }
763}
764
765/// Build an [`IndexRecord`] from a file on disk. Missing `summary` →
766/// [`MISSING_SUMMARY`] placeholder (the index never invents a summary).
767fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
768    let meta = read_frontmatter(abs)?;
769    Ok(IndexRecord {
770        path: rel,
771        type_: meta.type_.unwrap_or_default(),
772        summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
773        tags: meta.tags,
774        links: meta.links,
775        created: meta.created,
776        updated: meta.updated,
777        fields: meta.fields,
778    })
779}
780
781/// The slice of a frontmatter this module needs.
782struct FileMeta {
783    type_: Option<String>,
784    summary: Option<String>,
785    tags: Vec<String>,
786    links: Vec<String>,
787    created: Option<DateTime<FixedOffset>>,
788    updated: Option<DateTime<FixedOffset>>,
789    fields: BTreeMap<String, Value>,
790}
791
792/// Minimal frontmatter read: split the leading `---`…`---` block and parse it
793/// as YAML, extracting the typed fields and spilling the rest into `fields`.
794/// Self-contained (does not route through the `parser` module).
795fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
796    let text = fs::read_to_string(abs)?;
797    let yaml = extract_frontmatter_block(&text).unwrap_or_default();
798    let map: serde_norway::Mapping = if yaml.trim().is_empty() {
799        serde_norway::Mapping::new()
800    } else {
801        serde_norway::from_str(&yaml).map_err(|e| {
802            crate::Error::Store(crate::store::StoreError::BadTypeIndex {
803                path: abs.to_path_buf(),
804                message: format!("frontmatter YAML: {e}"),
805            })
806        })?
807    };
808
809    let mut type_ = None;
810    let mut summary = None;
811    let mut tags = Vec::new();
812    let mut links = Vec::new();
813    let mut created = None;
814    let mut updated = None;
815    let mut fields = BTreeMap::new();
816
817    for (k, v) in map {
818        let key = match k.as_str() {
819            Some(s) => s.to_string(),
820            None => continue,
821        };
822        match key.as_str() {
823            "type" => type_ = v.as_str().map(str::to_string),
824            "summary" => summary = v.as_str().map(str::to_string),
825            "tags" => tags = yaml_string_list(&v),
826            "links" => links = yaml_string_list(&v),
827            "created" => created = v.as_str().and_then(parse_ts),
828            "updated" => updated = v.as_str().and_then(parse_ts),
829            // `path`, `type`, `summary`, `tags`, `links`, `created`, `updated`
830            // are the reserved IndexRecord keys; everything else (including
831            // `id`, `status`, type-specific fields) goes to `fields`.
832            "path" => {}
833            _ => {
834                fields.insert(key, yaml_to_json_value(&v));
835            }
836        }
837    }
838
839    Ok(FileMeta {
840        type_,
841        summary,
842        tags,
843        links,
844        created,
845        updated,
846        fields,
847    })
848}
849
850/// Pull the YAML between a leading `---` line and the next `---` line. Returns
851/// `None` when the file has no frontmatter fence at its very start.
852fn extract_frontmatter_block(text: &str) -> Option<String> {
853    let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
854    let mut lines = trimmed.lines();
855    let first = lines.next()?;
856    if first.trim_end() != "---" {
857        return None;
858    }
859    let mut block = String::new();
860    for line in lines {
861        if line.trim_end() == "---" {
862            return Some(block);
863        }
864        block.push_str(line);
865        block.push('\n');
866    }
867    None // no closing fence
868}
869
870/// Read a string scalar or a sequence-of-string-scalars into a `Vec<String>`.
871/// Wiki-link items keep their `[[…]]` form verbatim.
872fn yaml_string_list(v: &serde_norway::Value) -> Vec<String> {
873    match v {
874        serde_norway::Value::String(s) => vec![s.clone()],
875        serde_norway::Value::Sequence(seq) => seq
876            .iter()
877            .filter_map(yaml_string_or_wiki_link_literal)
878            .collect(),
879        _ => Vec::new(),
880    }
881}
882
883fn yaml_string_or_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
884    v.as_str()
885        .map(str::to_string)
886        .or_else(|| unquoted_wiki_link_literal(v))
887}
888
889fn yaml_to_json_value(v: &serde_norway::Value) -> Value {
890    if let Some(link) = unquoted_wiki_link_literal(v) {
891        return Value::String(link);
892    }
893    match v {
894        serde_norway::Value::String(s) => Value::String(s.clone()),
895        serde_norway::Value::Bool(b) => Value::Bool(*b),
896        serde_norway::Value::Number(n) => {
897            serde_json::to_value(n).unwrap_or_else(|_| Value::String(n.to_string()))
898        }
899        serde_norway::Value::Sequence(seq) => {
900            Value::Array(seq.iter().map(yaml_to_json_value).collect())
901        }
902        serde_norway::Value::Mapping(_) | serde_norway::Value::Tagged(_) => {
903            serde_json::to_value(v).unwrap_or(Value::Null)
904        }
905        serde_norway::Value::Null => Value::Null,
906    }
907}
908
909fn unquoted_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
910    let serde_norway::Value::Sequence(outer) = v else {
911        return None;
912    };
913    if outer.len() != 1 {
914        return None;
915    }
916    let serde_norway::Value::Sequence(inner) = &outer[0] else {
917        return None;
918    };
919    let [serde_norway::Value::String(target)] = inner.as_slice() else {
920        return None;
921    };
922    Some(format!("[[{target}]]"))
923}
924
925/// Parse an RFC3339 timestamp scalar.
926fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
927    DateTime::parse_from_rfc3339(s.trim()).ok()
928}
929
930/// Render a timestamp the same way `serde_json` renders an `IndexRecord`
931/// timestamp (RFC3339, `Z` for UTC, sub-seconds preserved) so the md
932/// frontmatter and the jsonl agree byte-for-byte.
933fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
934    ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
935}
936
937/// Max `updated` over an iterator of optional timestamps.
938fn max_updated<'a>(
939    it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
940) -> Option<DateTime<FixedOffset>> {
941    let mut best: Option<DateTime<FixedOffset>> = None;
942    for ts in it.flatten() {
943        best = Some(match best {
944            Some(cur) if cur >= *ts => cur,
945            _ => *ts,
946        });
947    }
948    best
949}
950
951/// Read a type-folder's `index.jsonl` into records, applying last-write-wins by
952/// `path` over any un-compacted lines (so a half-compacted jsonl still reads
953/// cleanly). Missing file → empty set. Returns records in canonical order.
954fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
955    let text = match fs::read_to_string(jsonl) {
956        Ok(t) => t,
957        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
958        Err(e) => return Err(e.into()),
959    };
960    // Last-write-wins by path; preserve only the final occurrence.
961    let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
962    for (i, line) in text.lines().enumerate() {
963        if line.trim().is_empty() {
964            continue;
965        }
966        let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
967            crate::Error::Store(crate::store::StoreError::BadTypeIndex {
968                path: jsonl.to_path_buf(),
969                message: format!("line {}: {e}", i + 1),
970            })
971        })?;
972        by_path.insert(rec.path.clone(), rec);
973    }
974    let mut records: Vec<IndexRecord> = by_path.into_values().collect();
975    sort_records(&mut records);
976    Ok(records)
977}
978
979/// Count the distinct content files a type-folder's `index.jsonl` catalogs —
980/// the **loop-path** count primitive, the rollup analogue of reading the
981/// per-folder sidecar. It reads only the one small sidecar (one line per file),
982/// never the content tree, so a rollup recompute over `K` type-folders is
983/// `O(K · folder)` sidecar reads — never `O(store files)` like
984/// [`walk_type_folder_files`]. Distinct-`path` (last-write-wins) so the count is
985/// byte-identical to [`read_jsonl_records`]`.len()` even on a half-compacted
986/// jsonl; a missing sidecar is `0`. Within the loop and within
987/// [`Index::rebuild_all`] the folder's jsonl is always rewritten before its
988/// parents are rolled up, so this equals `walk_type_folder_files(folder).len()`.
989fn jsonl_record_count(jsonl: &Path) -> crate::Result<usize> {
990    let text = match fs::read_to_string(jsonl) {
991        Ok(t) => t,
992        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(0),
993        Err(e) => return Err(e.into()),
994    };
995    let mut paths: BTreeSet<PathBuf> = BTreeSet::new();
996    for (i, line) in text.lines().enumerate() {
997        if line.trim().is_empty() {
998            continue;
999        }
1000        let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1001            crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1002                path: jsonl.to_path_buf(),
1003                message: format!("line {}: {e}", i + 1),
1004            })
1005        })?;
1006        paths.insert(rec.path);
1007    }
1008    Ok(paths.len())
1009}
1010
1011/// Per-child rollup counts for `layers`, read from each type-folder's on-disk
1012/// `index.jsonl` (via [`jsonl_record_count`]) rather than walked from the
1013/// content tree. The **loop-path** counterpart to the from-scratch counting in
1014/// [`Index::build_layer`] / [`Index::build_root`]: it keeps [`update_parents`]
1015/// `O(type-folders)` so a single write never re-enumerates the whole store.
1016fn child_counts_from_jsonl(
1017    store: &Store,
1018    layers: &[Layer],
1019) -> crate::Result<BTreeMap<PathBuf, usize>> {
1020    let mut child_counts = BTreeMap::new();
1021    for &layer in layers {
1022        for tf in type_folders_in_layer(store, layer) {
1023            let n = jsonl_record_count(&store.root.join(&tf).join("index.jsonl"))?;
1024            if n > 0 {
1025                child_counts.insert(tf, n);
1026            }
1027        }
1028    }
1029    Ok(child_counts)
1030}
1031
1032/// Walk a type-folder's `.md` content files, recursing through date-shards,
1033/// excluding the `index.md` artifact itself and any hidden entries.
1034fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
1035    let mut out = Vec::new();
1036    if !folder_abs.is_dir() {
1037        return out;
1038    }
1039    for entry in walkdir::WalkDir::new(folder_abs)
1040        .into_iter()
1041        .filter_entry(|e| !is_hidden(e.file_name()))
1042        .filter_map(|e| e.ok())
1043    {
1044        if !entry.file_type().is_file() {
1045            continue;
1046        }
1047        let p = entry.path();
1048        if p.extension().and_then(|e| e.to_str()) != Some("md") {
1049            continue;
1050        }
1051        if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
1052            continue;
1053        }
1054        out.push(p.to_path_buf());
1055    }
1056    out
1057}
1058
1059/// The immediate type-folders under a layer (one directory level below the
1060/// layer dir), as store-relative paths. Hidden dirs and `log/` are skipped.
1061fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1062    let layer_dir = store.root.join(layer_dir_name(layer));
1063    let mut out = Vec::new();
1064    let rd = match fs::read_dir(&layer_dir) {
1065        Ok(rd) => rd,
1066        Err(_) => return out,
1067    };
1068    for entry in rd.flatten() {
1069        if !entry.path().is_dir() {
1070            continue;
1071        }
1072        let name = entry.file_name();
1073        let name = match name.to_str() {
1074            Some(n) => n,
1075            None => continue,
1076        };
1077        if is_hidden(entry.file_name().as_os_str()) || name == "log" {
1078            continue;
1079        }
1080        out.push(PathBuf::from(layer_dir_name(layer)).join(name));
1081    }
1082    out.sort();
1083    out
1084}
1085
1086/// The type-folder a content file belongs to: `<layer>/<type>` (the first two
1087/// path components), or `None` if the path is not under a known layer with at
1088/// least a type segment.
1089fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
1090    let mut comps = file_rel.components();
1091    let layer = comps.next()?.as_os_str().to_str()?;
1092    layer_from_dir_name(layer)?;
1093    let type_seg = comps.next()?.as_os_str().to_str()?;
1094    Some(PathBuf::from(layer).join(type_seg))
1095}
1096
1097/// Convert an absolute path under `root` to a store-relative path.
1098fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
1099    abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
1100}
1101
1102/// Normalize a possibly-absolute or `./`-prefixed path to a clean
1103/// store-relative form (drops a leading `./`; leaves already-relative paths).
1104fn normalize_rel(p: &Path) -> PathBuf {
1105    let s = path_to_unix(p);
1106    let s = s.strip_prefix("./").unwrap_or(&s);
1107    PathBuf::from(s)
1108}
1109
1110fn is_index_artifact(p: &Path) -> bool {
1111    matches!(
1112        p.file_name().and_then(|n| n.to_str()),
1113        Some("index.md") | Some("index.jsonl")
1114    )
1115}
1116
1117fn is_hidden(name: &std::ffi::OsStr) -> bool {
1118    name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
1119}
1120
1121fn layer_dir_name(layer: Layer) -> &'static str {
1122    match layer {
1123        Layer::Sources => "sources",
1124        Layer::Records => "records",
1125        Layer::Wiki => "wiki",
1126    }
1127}
1128
1129/// Local layer-name parse. Mirrors the contract of [`Layer::from_dir_name`];
1130/// kept local to keep this module's walk self-contained (see the module header).
1131fn layer_from_dir_name(name: &str) -> Option<Layer> {
1132    match name {
1133        "sources" => Some(Layer::Sources),
1134        "records" => Some(Layer::Records),
1135        "wiki" => Some(Layer::Wiki),
1136        _ => None,
1137    }
1138}
1139
1140/// The final path component as a `&str` (folder basename).
1141fn folder_basename(p: &Path) -> &str {
1142    p.file_name().and_then(|n| n.to_str()).unwrap_or("")
1143}
1144
1145/// The canonical wiki-link target for a content path: the store-relative path
1146/// with `/` separators and the trailing `.md` stripped (the bare form the
1147/// `index.md` browse view links to).
1148fn wiki_target(p: &Path) -> String {
1149    let unix = path_to_unix(p);
1150    unix.strip_suffix(".md").unwrap_or(&unix).to_string()
1151}
1152
1153/// Render a path with `/` separators regardless of host OS, so artifacts are
1154/// identical on every platform.
1155fn path_to_unix(p: &Path) -> String {
1156    p.components()
1157        .filter_map(|c| c.as_os_str().to_str())
1158        .collect::<Vec<_>>()
1159        .join("/")
1160}
1161
1162/// Serde for [`IndexRecord::path`]: always forward-slash on the wire, so the
1163/// `index.jsonl` catalog is identical whether the store was written on POSIX or
1164/// Windows (a git clone across OSes yields the same paths, and the last-write-
1165/// wins upsert key never splits on separator style). On POSIX this matches the
1166/// default `PathBuf` serialization; on Windows it rewrites `\` to `/`.
1167mod path_serde {
1168    use super::path_to_unix;
1169    use serde::{Deserialize, Deserializer, Serializer};
1170    use std::path::{Path, PathBuf};
1171
1172    pub fn serialize<S: Serializer>(p: &Path, s: S) -> Result<S::Ok, S::Error> {
1173        s.serialize_str(&path_to_unix(p))
1174    }
1175
1176    pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<PathBuf, D::Error> {
1177        Ok(PathBuf::from(String::deserialize(d)?))
1178    }
1179}
1180
1181/// ASCII-capitalize the first character.
1182fn capitalize(s: &str) -> String {
1183    let mut chars = s.chars();
1184    match chars.next() {
1185        Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
1186        None => String::new(),
1187    }
1188}
1189
1190/// Truncate to at most `max` chars (char-boundary safe), single-line.
1191fn truncate(s: &str, max: usize) -> String {
1192    let one_line: String = s.split_whitespace().collect::<Vec<_>>().join(" ");
1193    if one_line.chars().count() <= max {
1194        one_line
1195    } else {
1196        one_line.chars().take(max).collect()
1197    }
1198}
1199
1200/// Atomic (rename-based) write for the **derived** catalog (`index.md` /
1201/// `index.jsonl`). Deliberately NOT `fsync`-durable like [`crate::fsx`]: the
1202/// index is rebuildable (`dbmd index rebuild`) and this is the O(changed)
1203/// write-through path, so a per-write `fsync` would be cost without benefit — a
1204/// crash-lost catalog write is recovered by a rebuild, not data loss. (Primary
1205/// data — content records, `log.md` — uses the durable `crate::fsx` path.)
1206fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
1207    if let Some(parent) = path.parent() {
1208        fs::create_dir_all(parent)?;
1209    }
1210    let dir = path.parent().unwrap_or_else(|| Path::new("."));
1211    let mut tmp = tempfile_in(dir)?;
1212    tmp.write_all(contents.as_bytes())?;
1213    tmp.flush()?;
1214    tmp.persist(path)?;
1215    Ok(())
1216}
1217
1218fn remove_if_exists(path: &Path) -> crate::Result<()> {
1219    match fs::remove_file(path) {
1220        Ok(()) => Ok(()),
1221        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
1222        Err(e) => Err(e.into()),
1223    }
1224}
1225
1226fn bad_index(path: &Path, msg: &str) -> crate::Error {
1227    crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1228        path: path.to_path_buf(),
1229        message: msg.to_string(),
1230    })
1231}
1232
1233// A tiny atomic-write helper. `tempfile` is a dev-dependency for tests; for
1234// the library path we hand-roll a temp-file-then-rename so writes are atomic
1235// without pulling `tempfile` into the non-dev dependency set. The file handle
1236// is held in an `Option` so `persist` can take it out without fighting the
1237// `Drop` impl (which only cleans up an un-persisted temp file).
1238struct AtomicTemp {
1239    file: Option<fs::File>,
1240    path: PathBuf,
1241    persisted: bool,
1242}
1243
1244impl AtomicTemp {
1245    fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
1246        self.file.as_mut().expect("temp file open").write_all(bytes)
1247    }
1248    fn flush(&mut self) -> std::io::Result<()> {
1249        self.file.as_mut().expect("temp file open").flush()
1250    }
1251    fn persist(mut self, dest: &Path) -> std::io::Result<()> {
1252        if let Some(f) = self.file.take() {
1253            f.sync_all().ok();
1254            // `f` dropped here, closing the handle before the rename.
1255        }
1256        fs::rename(&self.path, dest)?;
1257        self.persisted = true;
1258        Ok(())
1259    }
1260}
1261
1262impl Drop for AtomicTemp {
1263    fn drop(&mut self) {
1264        // Best-effort cleanup if not persisted (an error path bailed out).
1265        if !self.persisted {
1266            let _ = fs::remove_file(&self.path);
1267        }
1268    }
1269}
1270
1271fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
1272    use std::time::{SystemTime, UNIX_EPOCH};
1273    let nanos = SystemTime::now()
1274        .duration_since(UNIX_EPOCH)
1275        .map(|d| d.as_nanos())
1276        .unwrap_or(0);
1277    let pid = std::process::id();
1278    // Monotonic-ish unique suffix; the dir is the destination dir so rename is
1279    // same-filesystem and therefore atomic.
1280    let counter = next_temp_counter();
1281    let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
1282    let path = dir.join(name);
1283    let file = fs::OpenOptions::new()
1284        .write(true)
1285        .create_new(true)
1286        .open(&path)?;
1287    Ok(AtomicTemp {
1288        file: Some(file),
1289        path,
1290        persisted: false,
1291    })
1292}
1293
1294fn next_temp_counter() -> u64 {
1295    use std::sync::atomic::{AtomicU64, Ordering};
1296    static C: AtomicU64 = AtomicU64::new(0);
1297    C.fetch_add(1, Ordering::Relaxed)
1298}
1299
1300#[cfg(test)]
1301mod tests {
1302    use super::*;
1303    use std::collections::BTreeSet;
1304    use std::fs;
1305    use tempfile::TempDir;
1306
1307    // ── fixtures ─────────────────────────────────────────────────────────
1308
1309    /// A temp store with a `DB.md` marker. `store.config` is the parser default
1310    /// (these tests never exercise the config parser).
1311    fn mk_store() -> (TempDir, Store) {
1312        let dir = TempDir::new().unwrap();
1313        fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
1314        let store = Store {
1315            root: dir.path().to_path_buf(),
1316            config: crate::parser::Config::default(),
1317        };
1318        (dir, store)
1319    }
1320
1321    /// Write a content file at `rel` with the given frontmatter lines + body.
1322    /// `fm` is the raw YAML body between the fences (no `---`).
1323    fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
1324        let abs = store.root.join(rel);
1325        fs::create_dir_all(abs.parent().unwrap()).unwrap();
1326        fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
1327    }
1328
1329    /// Convenience: write a typed content file with summary/updated/extras.
1330    fn write_doc(
1331        store: &Store,
1332        rel: &str,
1333        type_: &str,
1334        summary: Option<&str>,
1335        updated: Option<&str>,
1336        extra_yaml: &str,
1337    ) {
1338        let mut fm = format!("type: {type_}\n");
1339        if let Some(s) = summary {
1340            fm.push_str(&format!("summary: {s}\n"));
1341        }
1342        if let Some(u) = updated {
1343            fm.push_str(&format!("updated: {u}\n"));
1344        }
1345        fm.push_str(extra_yaml);
1346        write_raw(store, rel, fm.trim_end(), "\nbody text\n");
1347    }
1348
1349    fn read(store: &Store, rel: &str) -> String {
1350        fs::read_to_string(store.root.join(rel)).unwrap()
1351    }
1352
1353    fn exists(store: &Store, rel: &str) -> bool {
1354        store.root.join(rel).exists()
1355    }
1356
1357    /// Collect every `index.md` + `index.jsonl` under the store, mapped to its
1358    /// bytes — the surface the byte-identity invariant compares.
1359    fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
1360        let mut out = BTreeMap::new();
1361        for entry in walkdir::WalkDir::new(&store.root)
1362            .into_iter()
1363            .filter_map(|e| e.ok())
1364        {
1365            let p = entry.path();
1366            if is_index_artifact(p) {
1367                let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
1368                out.insert(rel, fs::read_to_string(p).unwrap());
1369            }
1370        }
1371        out
1372    }
1373
1374    // ── build_type_folder + to_markdown ──────────────────────────────────
1375
1376    #[test]
1377    fn type_folder_aggregates_across_shards_in_recency_order() {
1378        let (_d, store) = mk_store();
1379        // Three emails across two month-shards, deliberately written
1380        // out-of-recency-order on disk.
1381        write_doc(
1382            &store,
1383            "sources/emails/2026/05/b-old.md",
1384            "email",
1385            Some("Older mail"),
1386            Some("2026-05-01T09:00:00Z"),
1387            "",
1388        );
1389        write_doc(
1390            &store,
1391            "sources/emails/2026/06/c-new.md",
1392            "email",
1393            Some("Newest mail"),
1394            Some("2026-06-15T12:00:00Z"),
1395            "",
1396        );
1397        write_doc(
1398            &store,
1399            "sources/emails/2026/05/a-mid.md",
1400            "email",
1401            Some("Middle mail"),
1402            Some("2026-05-20T08:00:00Z"),
1403            "",
1404        );
1405
1406        let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
1407        let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
1408        assert_eq!(
1409            paths,
1410            vec![
1411                "sources/emails/2026/06/c-new.md",
1412                "sources/emails/2026/05/a-mid.md",
1413                "sources/emails/2026/05/b-old.md",
1414            ],
1415            "records must aggregate across shards, newest `updated` first"
1416        );
1417    }
1418
1419    #[test]
1420    fn type_folder_md_format_entries_tags_and_derived_updated() {
1421        let (_d, store) = mk_store();
1422        write_doc(
1423            &store,
1424            "records/contacts/sarah-chen.md",
1425            "contact",
1426            Some("Renewal champion at Acme"),
1427            Some("2026-05-27T10:00:00Z"),
1428            "tags:\n  - renewal\n  - acme\n",
1429        );
1430        write_doc(
1431            &store,
1432            "records/contacts/no-tags.md",
1433            "contact",
1434            Some("Plain contact"),
1435            Some("2026-05-26T10:00:00Z"),
1436            "",
1437        );
1438
1439        let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
1440        let md = idx.to_markdown();
1441
1442        // Frontmatter is exact and the index's own `updated` is the MAX member
1443        // updated (the determinism the byte-identity invariant rests on).
1444        assert!(md.starts_with(
1445            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
1446        ), "frontmatter/heading wrong:\n{md}");
1447
1448        // Entry with tags: `— summary  ·  #tag #tag`.
1449        assert!(
1450            md.contains(
1451                "- [[records/contacts/sarah-chen]] — Renewal champion at Acme  ·  #renewal #acme\n"
1452            ),
1453            "tagged entry wrong:\n{md}"
1454        );
1455        // Entry without tags omits the `  ·  ` suffix entirely.
1456        assert!(
1457            md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
1458            "untagged entry wrong:\n{md}"
1459        );
1460        assert!(
1461            !md.contains("Plain contact  ·"),
1462            "untagged entry must not emit a tag separator"
1463        );
1464        // No `## More` below the cap.
1465        assert!(!md.contains("## More"), "no footer expected under the cap");
1466    }
1467
1468    #[test]
1469    fn missing_summary_becomes_placeholder_not_invented() {
1470        let (_d, store) = mk_store();
1471        write_doc(
1472            &store,
1473            "records/notes/x.md",
1474            "note",
1475            None,
1476            Some("2026-05-27T10:00:00Z"),
1477            "",
1478        );
1479        let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
1480        assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
1481        let md = idx.to_markdown();
1482        assert!(
1483            md.contains("- [[records/notes/x]] — (no summary)\n"),
1484            "missing summary must render the placeholder, not invent text:\n{md}"
1485        );
1486    }
1487
1488    // ── to_jsonl ─────────────────────────────────────────────────────────
1489
1490    #[test]
1491    fn jsonl_is_complete_structured_and_round_trips() {
1492        let (_d, store) = mk_store();
1493        write_doc(
1494            &store,
1495            "records/expenses/2026/05/e1.md",
1496            "expense",
1497            Some("Lunch with vendor"),
1498            Some("2026-05-10T10:00:00Z"),
1499            "created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ncompany: [[records/companies/acme]]\nrelated:\n  - [[wiki/themes/spend]]\ntags:\n  - food\nlinks:\n  - wiki/themes/spend\n  - [[wiki/themes/renewal]]\n",
1500        );
1501        write_doc(
1502            &store,
1503            "records/expenses/2026/06/e2.md",
1504            "expense",
1505            Some("Cloud bill"),
1506            Some("2026-06-01T10:00:00Z"),
1507            "amount: 100\n",
1508        );
1509
1510        let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
1511        let jsonl = idx.to_jsonl();
1512        let lines: Vec<&str> = jsonl.lines().collect();
1513        assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
1514
1515        // Newest first (e2), and each line parses back to an equal record.
1516        let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
1517        assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
1518        assert_eq!(
1519            r0, idx.records[0],
1520            "jsonl line must round-trip to the record"
1521        );
1522
1523        // The first (data) record carries every reserved field + the extras in
1524        // `fields` (status/amount), and links/tags verbatim.
1525        let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
1526        assert_eq!(r1.type_, "expense");
1527        assert_eq!(r1.summary, "Lunch with vendor");
1528        assert_eq!(r1.tags, vec!["food".to_string()]);
1529        assert_eq!(
1530            r1.links,
1531            vec![
1532                "wiki/themes/spend".to_string(),
1533                "[[wiki/themes/renewal]]".to_string()
1534            ]
1535        );
1536        assert_eq!(
1537            r1.created,
1538            Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
1539        );
1540        assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
1541        assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
1542        assert_eq!(
1543            r1.fields.get("company"),
1544            Some(&Value::from("[[records/companies/acme]]"))
1545        );
1546        assert_eq!(
1547            r1.fields.get("related"),
1548            Some(&serde_json::json!(["[[wiki/themes/spend]]"]))
1549        );
1550        // Reserved keys never leak into `fields`.
1551        for reserved in [
1552            "path", "type", "summary", "tags", "links", "created", "updated",
1553        ] {
1554            assert!(
1555                !r1.fields.contains_key(reserved),
1556                "reserved key {reserved} must not appear in fields"
1557            );
1558        }
1559
1560        // Stable key order: declared fields first, then sorted extras.
1561        assert!(
1562            lines[1].starts_with(
1563                r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["wiki/themes/spend","[[wiki/themes/renewal]]"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
1564            ),
1565            "jsonl key order not stable:\n{}",
1566            lines[1]
1567        );
1568        // The flattened extras come in BTreeMap (sorted) order.
1569        assert!(
1570            lines[1].ends_with(r#""amount":42,"company":"[[records/companies/acme]]","related":["[[wiki/themes/spend]]"],"status":"paid"}"#),
1571            "extras must be sorted:\n{}",
1572            lines[1]
1573        );
1574    }
1575
1576    // ── cap + footer ─────────────────────────────────────────────────────
1577
1578    #[test]
1579    fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
1580        let (_d, store) = mk_store();
1581        let total = MD_CAP + 7;
1582        for i in 0..total {
1583            // Distinct, monotonically increasing `updated` so order is total.
1584            let day = 1 + (i % 27);
1585            let rel = format!("sources/emails/2026/05/m-{i:04}.md");
1586            let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
1587            write_doc(
1588                &store,
1589                &rel,
1590                "email",
1591                Some(&format!("mail {i}")),
1592                Some(&updated),
1593                "",
1594            );
1595        }
1596        let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
1597        assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
1598
1599        let md = idx.to_markdown();
1600        let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
1601        assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
1602
1603        assert!(
1604            md.contains("## More\n\n"),
1605            "over-cap md needs a More footer"
1606        );
1607        assert!(
1608            md.contains(&format!(
1609                "This folder has {total} files. The 500 most recent are listed above.\n"
1610            )),
1611            "footer count wrong:\n{md}"
1612        );
1613        assert!(
1614            md.contains(
1615                "Use `dbmd index query --type email --in sources` for the complete catalog.\n"
1616            ),
1617            "footer must infer type=email layer=sources:\n{md}"
1618        );
1619
1620        let jsonl = idx.to_jsonl();
1621        assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
1622    }
1623
1624    // ── sort total order ─────────────────────────────────────────────────
1625
1626    #[test]
1627    fn sort_breaks_ties_by_path_and_puts_undated_last() {
1628        let mut recs = vec![
1629            rec("z/a.md", Some("2026-05-01T00:00:00Z")),
1630            rec("a/b.md", Some("2026-05-01T00:00:00Z")), // same updated, path < z/a
1631            rec("m/c.md", None),                         // undated → last
1632            rec("b/d.md", Some("2026-06-01T00:00:00Z")), // newest
1633        ];
1634        sort_records(&mut recs);
1635        let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
1636        assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
1637    }
1638
1639    fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
1640        IndexRecord {
1641            path: PathBuf::from(path),
1642            type_: "t".into(),
1643            summary: "s".into(),
1644            tags: vec![],
1645            links: vec![],
1646            created: None,
1647            updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
1648            fields: BTreeMap::new(),
1649        }
1650    }
1651
1652    // ── build_layer / build_root ─────────────────────────────────────────
1653
1654    #[test]
1655    fn layer_index_lists_type_folders_with_counts_and_preview() {
1656        let (_d, store) = mk_store();
1657        write_doc(
1658            &store,
1659            "records/contacts/a.md",
1660            "contact",
1661            Some("Contact A older"),
1662            Some("2026-05-01T00:00:00Z"),
1663            "",
1664        );
1665        write_doc(
1666            &store,
1667            "records/contacts/b.md",
1668            "contact",
1669            Some("Contact B newest"),
1670            Some("2026-05-09T00:00:00Z"),
1671            "",
1672        );
1673        write_doc(
1674            &store,
1675            "records/companies/x.md",
1676            "company",
1677            Some("Acme Inc"),
1678            Some("2026-05-05T00:00:00Z"),
1679            "",
1680        );
1681        // build the type-folder artifacts first (layer preview reads their jsonl)
1682        Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
1683        Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
1684
1685        Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
1686        let md = read(&store, "records/index.md");
1687
1688        assert!(
1689            md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
1690            "layer fm:\n{md}"
1691        );
1692        // Alphabetical type-folder order: companies before contacts.
1693        let companies_at = md.find("companies/index").unwrap();
1694        let contacts_at = md.find("contacts/index").unwrap();
1695        assert!(
1696            companies_at < contacts_at,
1697            "type folders must be alphabetical"
1698        );
1699        // Count + display + newest-summary preview.
1700        assert!(
1701            md.contains("- [[records/contacts/index|Contacts]] (2) — Contact B newest\n"),
1702            "contacts entry:\n{md}"
1703        );
1704        assert!(
1705            md.contains("- [[records/companies/index|Companies]] (1) — Acme Inc\n"),
1706            "companies entry:\n{md}"
1707        );
1708        // Layer `updated` is the max across children (contacts b = 05-09).
1709        assert!(
1710            md.contains("updated: 2026-05-09T00:00:00Z\n"),
1711            "layer updated must be max child:\n{md}"
1712        );
1713    }
1714
1715    #[test]
1716    fn root_index_groups_layers_with_totals_and_per_type_counts() {
1717        let (_d, store) = mk_store();
1718        write_doc(
1719            &store,
1720            "sources/emails/2026/05/a.md",
1721            "email",
1722            Some("Mail"),
1723            Some("2026-05-01T00:00:00Z"),
1724            "",
1725        );
1726        write_doc(
1727            &store,
1728            "sources/docs/d.md",
1729            "doc",
1730            Some("Doc"),
1731            Some("2026-05-02T00:00:00Z"),
1732            "",
1733        );
1734        write_doc(
1735            &store,
1736            "records/contacts/c.md",
1737            "contact",
1738            Some("C"),
1739            Some("2026-05-03T00:00:00Z"),
1740            "",
1741        );
1742        // wiki empty → no Wiki section
1743
1744        Index::rebuild_all(&store).unwrap();
1745        let md = read(&store, "index.md");
1746
1747        assert!(
1748            md.starts_with("---\ntype: index\nscope: root\n"),
1749            "root fm:\n{md}"
1750        );
1751        assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
1752        // Layer heading with total count; Sources before Records (canonical).
1753        let sources_h = md
1754            .find("## Sources (2)")
1755            .expect("sources heading w/ total 2");
1756        let records_h = md
1757            .find("## Records (1)")
1758            .expect("records heading w/ total 1");
1759        assert!(sources_h < records_h, "Sources must precede Records");
1760        assert!(!md.contains("## Wiki"), "empty layer gets no section");
1761        // Per-type sub-entries with (N), no preview at root.
1762        assert!(
1763            md.contains("- [[sources/docs/index|Docs]] (1)\n"),
1764            "root docs entry:\n{md}"
1765        );
1766        assert!(
1767            md.contains("- [[sources/emails/index|Emails]] (1)\n"),
1768            "root emails entry:\n{md}"
1769        );
1770        assert!(
1771            md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
1772            "root contacts entry:\n{md}"
1773        );
1774        assert!(!md.contains("— "), "root entries carry no preview text");
1775    }
1776
1777    // ── write-through == rebuild (THE invariant) ─────────────────────────
1778
1779    #[test]
1780    fn on_write_matches_rebuild_byte_for_byte() {
1781        // Build a store incrementally via on_write, and a second identical store
1782        // via a single rebuild_all, then assert every index artifact is equal.
1783        let (_d1, wt) = mk_store();
1784        let (_d2, rb) = mk_store();
1785
1786        let docs: &[(&str, &str, &str, &str, &str)] = &[
1787            (
1788                "sources/emails/2026/05/e1.md",
1789                "email",
1790                "First mail",
1791                "2026-05-01T10:00:00Z",
1792                "tags:\n  - inbox\n",
1793            ),
1794            (
1795                "sources/emails/2026/06/e2.md",
1796                "email",
1797                "Second mail",
1798                "2026-06-01T10:00:00Z",
1799                "",
1800            ),
1801            (
1802                "records/contacts/sarah.md",
1803                "contact",
1804                "Sarah",
1805                "2026-05-15T10:00:00Z",
1806                "links:\n  - wiki/people/sarah\n",
1807            ),
1808            (
1809                "records/contacts/elena.md",
1810                "contact",
1811                "Elena",
1812                "2026-05-20T10:00:00Z",
1813                "status: active\n",
1814            ),
1815            (
1816                "wiki/people/sarah.md",
1817                "wiki-page",
1818                "Sarah bio",
1819                "2026-05-21T10:00:00Z",
1820                "",
1821            ),
1822        ];
1823
1824        for (rel, t, sum, upd, extra) in docs {
1825            write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
1826            write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
1827            Index::on_write(&wt, Path::new(rel)).unwrap();
1828        }
1829        Index::rebuild_all(&rb).unwrap();
1830
1831        let a = snapshot_artifacts(&wt);
1832        let b = snapshot_artifacts(&rb);
1833        assert_eq!(
1834            a.keys().collect::<Vec<_>>(),
1835            b.keys().collect::<Vec<_>>(),
1836            "same set of index artifacts must exist"
1837        );
1838        for (k, v) in &a {
1839            assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
1840        }
1841        // Sanity: artifacts actually exist (not a vacuous comparison of empties).
1842        assert!(a.contains_key("index.md"));
1843        assert!(a.contains_key("sources/emails/index.jsonl"));
1844        assert!(a.contains_key("records/contacts/index.md"));
1845    }
1846
1847    /// Regression (O(changed) bound, not just correctness): a loop op must
1848    /// recompute its parent rollups from the type-folder `index.jsonl` sidecars
1849    /// — never by walking the content tree of *sibling* folders it wasn't asked
1850    /// about. The byte-identity property test (which always indexes every folder
1851    /// before comparing) can't catch a violation, because a full-store walk
1852    /// produces the *correct* counts too; it just does so in `O(store files)`.
1853    ///
1854    /// The behavioral fingerprint of the old `update_parents → build_layer /
1855    /// build_root` (which called `walk_type_folder_files` on every type-folder in
1856    /// the store): a single `on_write` to `records/contacts/sarah.md` would
1857    /// surface, in the layer + root rollups, the file count of
1858    /// `records/companies` — a sibling that has content on disk but was NEVER
1859    /// passed to a write/index op, so it has no `index.jsonl`. An O(changed) loop
1860    /// op cannot "see" that un-indexed folder; a whole-store walk can. So this
1861    /// asserts the rollups reflect ONLY the sidecar-indexed folder, proving no
1862    /// content-tree walk happened.
1863    #[test]
1864    fn loop_op_does_not_walk_sibling_content_tree() {
1865        let (_d, store) = mk_store();
1866
1867        // A sibling type-folder with real content on disk, but deliberately
1868        // never indexed (no on_write / write_level / rebuild over it) ⇒ no
1869        // `records/companies/index.jsonl` exists.
1870        write_doc(
1871            &store,
1872            "records/companies/acme.md",
1873            "company",
1874            Some("Acme Inc"),
1875            Some("2026-05-05T00:00:00Z"),
1876            "",
1877        );
1878        write_doc(
1879            &store,
1880            "records/companies/globex.md",
1881            "company",
1882            Some("Globex"),
1883            Some("2026-05-06T00:00:00Z"),
1884            "",
1885        );
1886        assert!(
1887            !exists(&store, "records/companies/index.jsonl"),
1888            "precondition: companies must be un-indexed"
1889        );
1890
1891        // The ONLY loop op: a single write to a different type-folder.
1892        write_doc(
1893            &store,
1894            "records/contacts/sarah.md",
1895            "contact",
1896            Some("Sarah"),
1897            Some("2026-05-15T00:00:00Z"),
1898            "",
1899        );
1900        Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
1901
1902        // The written folder is reflected in both rollups...
1903        let layer_md = read(&store, "records/index.md");
1904        let root_md = read(&store, "index.md");
1905        // (layer rollup appends a summary preview, root does not)
1906        assert!(
1907            layer_md.contains("- [[records/contacts/index|Contacts]] (1) — Sarah\n"),
1908            "layer must reflect the written folder:\n{layer_md}"
1909        );
1910        assert!(
1911            root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
1912            "root must reflect the written folder:\n{root_md}"
1913        );
1914
1915        // ...but the un-indexed sibling must be INVISIBLE to a loop op. If the
1916        // rollups mention `records/companies` at all, `on_write` walked the whole
1917        // content tree — the O(store) regression.
1918        assert!(
1919            !layer_md.contains("companies"),
1920            "loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
1921        );
1922        assert!(
1923            !root_md.contains("companies"),
1924            "loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
1925        );
1926        // The layer's only child is contacts ⇒ its total is exactly 1, not 3.
1927        assert!(
1928            root_md.contains("## Records (1)"),
1929            "root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
1930        );
1931
1932        // And the sidecar-derived count IS what a full walk WOULD yield once the
1933        // sibling is indexed too — i.e. the fix changes cost, not the eventual
1934        // result. Index companies, then confirm the rollups now (and only now)
1935        // include it, byte-identical to a from-scratch rebuild.
1936        let (_d2, rb) = mk_store();
1937        for (rel, t, s, u) in [
1938            (
1939                "records/companies/acme.md",
1940                "company",
1941                "Acme Inc",
1942                "2026-05-05T00:00:00Z",
1943            ),
1944            (
1945                "records/companies/globex.md",
1946                "company",
1947                "Globex",
1948                "2026-05-06T00:00:00Z",
1949            ),
1950            (
1951                "records/contacts/sarah.md",
1952                "contact",
1953                "Sarah",
1954                "2026-05-15T00:00:00Z",
1955            ),
1956        ] {
1957            write_doc(&rb, rel, t, Some(s), Some(u), "");
1958        }
1959        Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
1960        Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
1961        Index::rebuild_all(&rb).unwrap();
1962        let a = snapshot_artifacts(&store);
1963        let b = snapshot_artifacts(&rb);
1964        assert_eq!(
1965            a.keys().collect::<BTreeSet<_>>(),
1966            b.keys().collect::<BTreeSet<_>>(),
1967            "same artifact set after indexing both folders"
1968        );
1969        for (k, v) in &a {
1970            assert_eq!(
1971                v, &b[k],
1972                "after indexing the sibling too, loop result must equal rebuild for {k}"
1973            );
1974        }
1975        assert!(
1976            read(&store, "index.md").contains("## Records (3)"),
1977            "now that both folders are indexed, the root total is 3"
1978        );
1979    }
1980
1981    /// Regression: a wiki-page filed at the path the toolkit ITSELF computes
1982    /// (`Store::shard_path_for`) must be indexable end-to-end. The bug was that
1983    /// `shard_path_for("wiki-page", …)` returned a 2-component `wiki/<file>`
1984    /// path, which `type_folder_of` treats as having no type-folder. That made
1985    /// the producer (path computation) disagree with the consumer (index): the
1986    /// loop path crashed (`on_write` → `Err`, it tried to write `index.md`
1987    /// *inside* a file) while the sweep path silently dropped the page from
1988    /// every catalog. This test drives both paths through the real
1989    /// `shard_path_for` output and asserts (1) `on_write` succeeds, (2) the page
1990    /// appears in the rebuilt catalog, and (3) write-through == rebuild.
1991    #[test]
1992    fn wiki_page_at_shard_path_for_is_indexable_end_to_end() {
1993        let (_d1, wt) = mk_store();
1994        let (_d2, rb) = mk_store();
1995
1996        // The toolkit's own canonical write path for a wiki-page.
1997        let rel = wt
1998            .shard_path_for(
1999                "wiki-page",
2000                &crate::parser::Frontmatter::default(),
2001                "renewal-theme",
2002            )
2003            .unwrap();
2004        let rel_str = path_to_unix(&rel);
2005        // Guard the precondition the consumer requires: 3+ components so
2006        // `type_folder_of` resolves a real `<layer>/<type-folder>`.
2007        assert!(
2008            type_folder_of(&rel).is_some(),
2009            "shard_path_for produced a path the index cannot file: {rel_str}"
2010        );
2011
2012        write_doc(
2013            &wt,
2014            &rel_str,
2015            "wiki-page",
2016            Some("Renewal theme"),
2017            Some("2026-05-21T10:00:00Z"),
2018            "",
2019        );
2020        write_doc(
2021            &rb,
2022            &rel_str,
2023            "wiki-page",
2024            Some("Renewal theme"),
2025            Some("2026-05-21T10:00:00Z"),
2026            "",
2027        );
2028
2029        // (1) Loop path must NOT error (the old `wiki/<file>` shape returned
2030        // Err(Io(NotADirectory))).
2031        Index::on_write(&wt, &rel)
2032            .expect("on_write must succeed for a toolkit-computed wiki-page path");
2033        Index::rebuild_all(&rb).unwrap();
2034
2035        // (2) The page is present in the rebuilt catalog (the old flat-path bug
2036        // silently omitted it from every artifact). The individual page link
2037        // lives in the *type-folder* index; the *layer* index rolls the
2038        // type-folder up — assert both, since the bug erased both.
2039        let page_link = wiki_target(&rel); // wiki/topics/renewal-theme
2040        let tf_md = read(&rb, "wiki/topics/index.md");
2041        assert!(
2042            tf_md.contains(&format!("[[{page_link}]]")),
2043            "type-folder index must list the page link, got:\n{tf_md}"
2044        );
2045        assert!(
2046            exists(&rb, "wiki/topics/index.jsonl"),
2047            "type-folder jsonl must exist"
2048        );
2049        assert!(
2050            read(&rb, "wiki/topics/index.jsonl").contains(&rel_str),
2051            "type-folder jsonl must contain the page row"
2052        );
2053        // The layer index rolls the type-folder up (proves the page's folder is
2054        // visible to the layer catalog, not dropped).
2055        let layer_md = read(&rb, "wiki/index.md");
2056        assert!(
2057            layer_md.contains("wiki/topics/index"),
2058            "layer index must roll up the wiki/topics type-folder, got:\n{layer_md}"
2059        );
2060
2061        // (3) Write-through equals rebuild byte-for-byte — loop and sweep agree.
2062        let a = snapshot_artifacts(&wt);
2063        let b = snapshot_artifacts(&rb);
2064        assert_eq!(
2065            a.keys().collect::<Vec<_>>(),
2066            b.keys().collect::<Vec<_>>(),
2067            "loop and sweep must produce the same artifact set"
2068        );
2069        for (k, v) in &a {
2070            assert_eq!(
2071                v, &b[k],
2072                "wiki-page artifact {k} differs between on_write and rebuild"
2073            );
2074        }
2075    }
2076
2077    #[test]
2078    fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
2079        let (_d1, wt) = mk_store();
2080        let (_d2, rb) = mk_store();
2081        let total = MD_CAP + 3; // 503 files; removing one keeps md full at 500
2082        let mut all_rels = Vec::new();
2083        for i in 0..total {
2084            let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2085            // `updated` strictly increasing across i by varying both minute and second
2086            let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
2087            write_doc(
2088                &wt,
2089                &rel,
2090                "email",
2091                Some(&format!("mail {i}")),
2092                Some(&updated),
2093                "",
2094            );
2095            write_doc(
2096                &rb,
2097                &rel,
2098                "email",
2099                Some(&format!("mail {i}")),
2100                Some(&updated),
2101                "",
2102            );
2103            all_rels.push(rel);
2104        }
2105        // Build write-through index, then remove the single newest file.
2106        Index::rebuild_all(&wt).unwrap();
2107        let newest = &all_rels[total - 1]; // highest i = newest updated
2108        fs::remove_file(wt.root.join(newest)).unwrap();
2109        Index::on_remove(&wt, Path::new(newest)).unwrap();
2110
2111        // Rebuild side: same end state (file physically absent).
2112        fs::remove_file(rb.root.join(newest)).unwrap();
2113        Index::rebuild_all(&rb).unwrap();
2114
2115        let a = snapshot_artifacts(&wt);
2116        let b = snapshot_artifacts(&rb);
2117        for (k, v) in &a {
2118            assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
2119        }
2120
2121        // The md must still hold exactly 500 entries (the 501st got pulled in)
2122        // and the removed file must be gone from both artifacts.
2123        let md = read(&wt, "sources/emails/index.md");
2124        assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
2125        // Removed (newest) file is gone from the bare-path md and the .md jsonl.
2126        assert!(
2127            !md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
2128            "removed file must not be listed in md"
2129        );
2130        // The file previously at rank 501 (excluded under the cap) is `all_rels[2]`
2131        // — `updated` increases with index, so newest-first rank 500 = index 2.
2132        // After dropping the newest it shifts into the visible 500.
2133        let pulled_in = &all_rels[2];
2134        assert!(
2135            md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
2136            "the 501st-most-recent must be pulled into the browse view after a removal"
2137        );
2138        assert!(
2139            md.contains(&format!("This folder has {} files.", total - 1)),
2140            "footer count must decrement:\n{}",
2141            md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
2142        );
2143        let jsonl = read(&wt, "sources/emails/index.jsonl");
2144        assert_eq!(
2145            jsonl.lines().count(),
2146            total - 1,
2147            "jsonl loses exactly the removed file"
2148        );
2149        assert!(
2150            !jsonl.contains(&path_to_unix(Path::new(newest))),
2151            "removed file must be gone from the jsonl too"
2152        );
2153    }
2154
2155    #[test]
2156    fn on_rename_cross_folder_matches_rebuild() {
2157        let (_d1, wt) = mk_store();
2158        let (_d2, rb) = mk_store();
2159        // Seed both stores identically.
2160        let seed: &[(&str, &str, &str, &str)] = &[
2161            (
2162                "records/contacts/a.md",
2163                "contact",
2164                "A",
2165                "2026-05-01T00:00:00Z",
2166            ),
2167            (
2168                "records/contacts/b.md",
2169                "contact",
2170                "B",
2171                "2026-05-02T00:00:00Z",
2172            ),
2173            (
2174                "records/companies/x.md",
2175                "company",
2176                "X",
2177                "2026-05-03T00:00:00Z",
2178            ),
2179        ];
2180        for (rel, t, s, u) in seed {
2181            write_doc(&wt, rel, t, Some(s), Some(u), "");
2182            write_doc(&rb, rel, t, Some(s), Some(u), "");
2183        }
2184        Index::rebuild_all(&wt).unwrap();
2185
2186        // Rename contacts/b.md -> companies/b.md (cross type-folder). The file's
2187        // `type` changes to match its new folder, as a real `dbmd rename` would.
2188        let old = "records/contacts/b.md";
2189        let new = "records/companies/b.md";
2190        fs::create_dir_all(wt.root.join("records/companies")).unwrap();
2191        fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
2192        // (type stays "contact" here; index copies frontmatter verbatim — the
2193        // test only asserts placement + parity with rebuild.)
2194        Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
2195
2196        // Rebuild side: same end state.
2197        fs::create_dir_all(rb.root.join("records/companies")).unwrap();
2198        fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
2199        Index::rebuild_all(&rb).unwrap();
2200
2201        let a = snapshot_artifacts(&wt);
2202        let b = snapshot_artifacts(&rb);
2203        assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
2204        for (k, v) in &a {
2205            assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
2206        }
2207        // Concretely: b is gone from contacts, present in companies.
2208        let contacts = read(&wt, "records/contacts/index.md");
2209        assert!(!contacts.contains("records/contacts/b]]"));
2210        let companies = read(&wt, "records/companies/index.md");
2211        assert!(companies.contains("[[records/companies/b]]"));
2212    }
2213
2214    #[test]
2215    fn on_write_updates_existing_entry_in_place() {
2216        let (_d, store) = mk_store();
2217        write_doc(
2218            &store,
2219            "records/contacts/a.md",
2220            "contact",
2221            Some("Original"),
2222            Some("2026-05-01T00:00:00Z"),
2223            "",
2224        );
2225        Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2226        // Edit the same file: new summary + newer updated.
2227        write_doc(
2228            &store,
2229            "records/contacts/a.md",
2230            "contact",
2231            Some("Revised"),
2232            Some("2026-05-09T00:00:00Z"),
2233            "",
2234        );
2235        Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2236
2237        let jsonl = read(&store, "records/contacts/index.jsonl");
2238        assert_eq!(
2239            jsonl.lines().count(),
2240            1,
2241            "upsert must not duplicate the line"
2242        );
2243        assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
2244        assert!(
2245            !jsonl.contains("Original"),
2246            "stale line must be gone (compacted)"
2247        );
2248        let md = read(&store, "records/contacts/index.md");
2249        assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
2250        assert!(
2251            md.contains("updated: 2026-05-09T00:00:00Z\n"),
2252            "index updated must track the newer member"
2253        );
2254    }
2255
2256    // ── dry-run + cleanup ────────────────────────────────────────────────
2257
2258    #[test]
2259    fn dry_run_emits_separators_and_writes_nothing() {
2260        let (_d, store) = mk_store();
2261        write_doc(
2262            &store,
2263            "sources/emails/2026/05/a.md",
2264            "email",
2265            Some("Mail"),
2266            Some("2026-05-01T00:00:00Z"),
2267            "",
2268        );
2269        let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
2270            .unwrap();
2271        assert!(
2272            out.contains("--- sources/emails/index.md ---\n"),
2273            "md separator:\n{out}"
2274        );
2275        assert!(
2276            out.contains("--- sources/emails/index.jsonl ---\n"),
2277            "jsonl separator:\n{out}"
2278        );
2279        assert!(
2280            out.contains("- [[sources/emails/2026/05/a]] — Mail"),
2281            "md body present"
2282        );
2283        // Nothing was written to disk.
2284        assert!(
2285            !exists(&store, "sources/emails/index.md"),
2286            "dry-run must not write"
2287        );
2288        assert!(
2289            !exists(&store, "sources/emails/index.jsonl"),
2290            "dry-run must not write"
2291        );
2292    }
2293
2294    #[test]
2295    fn cleanup_removes_noncanonical_and_empty_indexes() {
2296        let (_d, store) = mk_store();
2297        write_doc(
2298            &store,
2299            "sources/emails/2026/05/a.md",
2300            "email",
2301            Some("Mail"),
2302            Some("2026-05-01T00:00:00Z"),
2303            "",
2304        );
2305        // A stray index inside a date-shard (non-canonical) ...
2306        fs::write(
2307            store.root.join("sources/emails/2026/05/index.md"),
2308            "stale\n",
2309        )
2310        .unwrap();
2311        fs::write(
2312            store.root.join("sources/emails/2026/05/index.jsonl"),
2313            "stale\n",
2314        )
2315        .unwrap();
2316        // ... and an index in an empty type-folder.
2317        fs::create_dir_all(store.root.join("records/empty")).unwrap();
2318        fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
2319
2320        Index::cleanup(&store).unwrap();
2321
2322        assert!(
2323            !exists(&store, "sources/emails/2026/05/index.md"),
2324            "shard index must be deleted"
2325        );
2326        assert!(
2327            !exists(&store, "sources/emails/2026/05/index.jsonl"),
2328            "shard jsonl must be deleted"
2329        );
2330        assert!(
2331            !exists(&store, "records/empty/index.md"),
2332            "empty-folder index must be deleted"
2333        );
2334        // The canonical type-folder file itself is untouched by cleanup.
2335        assert!(exists(&store, "sources/emails/2026/05/a.md"));
2336    }
2337
2338    #[test]
2339    fn rebuild_deletes_stale_indexes_for_emptied_folders() {
2340        let (_d, store) = mk_store();
2341        write_doc(
2342            &store,
2343            "records/contacts/a.md",
2344            "contact",
2345            Some("A"),
2346            Some("2026-05-01T00:00:00Z"),
2347            "",
2348        );
2349        Index::rebuild_all(&store).unwrap();
2350        assert!(exists(&store, "records/contacts/index.md"));
2351        assert!(exists(&store, "records/index.md"));
2352        assert!(exists(&store, "index.md"));
2353
2354        // Empty the folder entirely, then rebuild: all three levels vanish.
2355        fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
2356        Index::rebuild_all(&store).unwrap();
2357        assert!(
2358            !exists(&store, "records/contacts/index.md"),
2359            "emptied type-folder index gone"
2360        );
2361        assert!(
2362            !exists(&store, "records/index.md"),
2363            "now-empty layer index gone"
2364        );
2365        assert!(!exists(&store, "index.md"), "now-empty root index gone");
2366    }
2367
2368    // ── randomized parity (property-style) ───────────────────────────────
2369
2370    #[test]
2371    fn property_writethrough_equals_rebuild_under_mixed_ops() {
2372        // Deterministic pseudo-random op sequence (no rand crate): a small LCG.
2373        let (_d1, wt) = mk_store();
2374        let (_d2, rb) = mk_store();
2375        let mut seed: u64 = 0x9E3779B97F4A7C15;
2376        let mut next = || {
2377            seed = seed
2378                .wrapping_mul(6364136223846793005)
2379                .wrapping_add(1442695040888963407);
2380            (seed >> 33) as u32
2381        };
2382
2383        let folders = ["sources/emails", "records/contacts", "wiki/people"];
2384        let types = ["email", "contact", "wiki-page"];
2385        let mut live: Vec<String> = Vec::new(); // store-relative paths that exist
2386
2387        for step in 0..120u32 {
2388            let r = next();
2389            let op = r % 10;
2390            if op < 6 || live.is_empty() {
2391                // CREATE/UPDATE
2392                let fi = (next() as usize) % folders.len();
2393                let folder = folders[fi];
2394                let id = next() % 40;
2395                let rel = if folder == "sources/emails" {
2396                    let month = 5 + (id % 2); // shard across two months
2397                    format!("{folder}/2026/{month:02}/f-{id:02}.md")
2398                } else {
2399                    format!("{folder}/f-{id:02}.md")
2400                };
2401                // recency varies with step so order is meaningful + total
2402                let updated = format!(
2403                    "2026-05-{:02}T{:02}:{:02}:00Z",
2404                    1 + (step % 27),
2405                    step % 24,
2406                    id % 60
2407                );
2408                let extra = if id % 3 == 0 {
2409                    "tags:\n  - x\n  - y\n"
2410                } else {
2411                    ""
2412                };
2413                write_doc(
2414                    &wt,
2415                    &rel,
2416                    types[fi],
2417                    Some(&format!("sum {step}")),
2418                    Some(&updated),
2419                    extra,
2420                );
2421                write_doc(
2422                    &rb,
2423                    &rel,
2424                    types[fi],
2425                    Some(&format!("sum {step}")),
2426                    Some(&updated),
2427                    extra,
2428                );
2429                Index::on_write(&wt, Path::new(&rel)).unwrap();
2430                if !live.contains(&rel) {
2431                    live.push(rel);
2432                }
2433            } else if op < 8 {
2434                // REMOVE a live file
2435                let idx = (next() as usize) % live.len();
2436                let rel = live.remove(idx);
2437                fs::remove_file(wt.root.join(&rel)).unwrap();
2438                fs::remove_file(rb.root.join(&rel)).ok();
2439                Index::on_remove(&wt, Path::new(&rel)).unwrap();
2440            } else {
2441                // RENAME a live file within the same layer (new id, maybe new type-folder)
2442                let idx = (next() as usize) % live.len();
2443                let old = live[idx].clone();
2444                // pick a destination folder in the same layer-ish set
2445                let fi = (next() as usize) % folders.len();
2446                let folder = folders[fi];
2447                let id = 50 + (next() % 40);
2448                let new = if folder == "sources/emails" {
2449                    format!("{folder}/2026/05/f-{id:02}.md")
2450                } else {
2451                    format!("{folder}/f-{id:02}.md")
2452                };
2453                if new == old || live.contains(&new) {
2454                    continue;
2455                }
2456                fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
2457                fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
2458                fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
2459                fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
2460                Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
2461                live[idx] = new;
2462            }
2463        }
2464
2465        // Now rebuild the rb side from the shared end state and compare.
2466        Index::rebuild_all(&rb).unwrap();
2467        let a = snapshot_artifacts(&wt);
2468        let b = snapshot_artifacts(&rb);
2469        assert_eq!(
2470            a.keys().collect::<BTreeSet<_>>(),
2471            b.keys().collect::<BTreeSet<_>>(),
2472            "write-through and rebuild must produce the same set of artifacts"
2473        );
2474        for (k, v) in &a {
2475            assert_eq!(
2476                v, &b[k],
2477                "INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
2478                b[k]
2479            );
2480        }
2481        assert!(
2482            !a.is_empty(),
2483            "the run must have produced at least one artifact"
2484        );
2485    }
2486}