use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::io::Write as _;
use std::path::{Path, PathBuf};
use chrono::{DateTime, FixedOffset, SecondsFormat};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::store::{Layer, Store};
const MD_CAP: usize = 500;
const MISSING_SUMMARY: &str = "(no summary)";
const ROOT_TITLE: &str = "Knowledge base index";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum IndexLevel {
Root,
Layer(Layer),
TypeFolder(PathBuf),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct IndexRecord {
pub path: PathBuf,
#[serde(rename = "type")]
pub type_: String,
pub summary: String,
#[serde(default)]
pub tags: Vec<String>,
#[serde(default)]
pub links: Vec<String>,
pub created: Option<DateTime<FixedOffset>>,
pub updated: Option<DateTime<FixedOffset>>,
#[serde(flatten)]
pub fields: BTreeMap<String, Value>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Index {
pub level: IndexLevel,
pub records: Vec<IndexRecord>,
pub child_counts: BTreeMap<PathBuf, usize>,
}
impl Index {
pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
let rel = normalize_rel(type_folder);
let abs = store.root.join(&rel);
let mut records = Vec::new();
for file_abs in walk_type_folder_files(&abs) {
let rel_path =
rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
records.push(record_from_file(&file_abs, rel_path)?);
}
sort_records(&mut records);
Ok(Index {
level: IndexLevel::TypeFolder(rel),
records,
child_counts: BTreeMap::new(),
})
}
pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
let mut child_counts = BTreeMap::new();
for tf in type_folders_in_layer(store, layer) {
let abs = store.root.join(&tf);
let n = walk_type_folder_files(&abs).len();
if n > 0 {
child_counts.insert(tf, n);
}
}
Ok(Index {
level: IndexLevel::Layer(layer),
records: Vec::new(),
child_counts,
})
}
pub fn build_root(store: &Store) -> crate::Result<Index> {
let mut child_counts = BTreeMap::new();
for layer in Layer::all() {
for tf in type_folders_in_layer(store, layer) {
let abs = store.root.join(&tf);
let n = walk_type_folder_files(&abs).len();
if n > 0 {
child_counts.insert(tf, n);
}
}
}
Ok(Index {
level: IndexLevel::Root,
records: Vec::new(),
child_counts,
})
}
pub fn to_markdown(&self) -> String {
match &self.level {
IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
IndexLevel::Layer(layer) => self.render_layer_md(*layer),
IndexLevel::Root => self.render_root_md(),
}
}
pub fn to_jsonl(&self) -> String {
let mut out = String::new();
for rec in &self.records {
let line = serde_json::to_string(rec).expect("IndexRecord serializes");
out.push_str(&line);
out.push('\n');
}
out
}
fn render_type_folder_md(&self, folder: &Path) -> String {
let folder_disp = path_to_unix(folder);
let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: type-folder\n");
s.push_str(&format!("folder: {folder_disp}\n"));
if let Some(ts) = updated {
s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
}
s.push_str("---\n\n");
s.push_str(&format!("# {folder_disp}\n\n"));
let shown = self.records.len().min(MD_CAP);
for rec in self.records.iter().take(shown) {
s.push_str(&format_md_entry(rec));
s.push('\n');
}
if self.records.len() > MD_CAP {
let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
let layer = folder
.components()
.next()
.and_then(|c| c.as_os_str().to_str())
.unwrap_or("");
s.push('\n');
s.push_str(&more_footer(self.records.len(), type_, layer));
}
s
}
fn render_layer_md(&self, layer: Layer) -> String {
let layer_dir = layer_dir_name(layer);
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: layer\n");
s.push_str(&format!("folder: {layer_dir}\n"));
s.push_str("---\n\n");
s.push_str(&format!("# {layer_dir}\n\n"));
for (tf, n) in &self.child_counts {
let tf_unix = path_to_unix(tf);
let display = capitalize(folder_basename(tf));
s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
}
s
}
fn render_root_md(&self) -> String {
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: root\n");
s.push_str("---\n\n");
s.push_str(&format!("# {ROOT_TITLE}\n"));
for layer in Layer::all() {
let layer_dir = layer_dir_name(layer);
let prefix = format!("{layer_dir}/");
let children: Vec<(&PathBuf, &usize)> = self
.child_counts
.iter()
.filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
.collect();
if children.is_empty() {
continue;
}
let total: usize = children.iter().map(|(_, n)| **n).sum();
s.push('\n');
s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
for (tf, n) in children {
let tf_unix = path_to_unix(tf);
let display = capitalize(folder_basename(tf));
s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
}
}
s
}
}
impl Index {
pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
let file_rel = normalize_rel(file);
let file_abs = store.root.join(&file_rel);
let folder = type_folder_of(&file_rel)
.ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
let record = record_from_file(&file_abs, file_rel.clone())?;
let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
records.retain(|r| r.path != record.path);
records.push(record);
sort_records(&mut records);
write_type_folder_artifacts(store, &folder, &records)?;
update_parents(store, &folder)?;
Ok(())
}
pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
let old_rel = normalize_rel(old);
let new_rel = normalize_rel(new);
let old_folder = type_folder_of(&old_rel)
.ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
let new_folder = type_folder_of(&new_rel)
.ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
let mut old_records =
read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
old_records.retain(|r| r.path != old_rel);
if old_folder == new_folder {
let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
old_records.retain(|r| r.path != record.path);
old_records.push(record);
sort_records(&mut old_records);
write_type_folder_artifacts(store, &old_folder, &old_records)?;
update_parents(store, &old_folder)?;
return Ok(());
}
sort_records(&mut old_records);
write_type_folder_artifacts(store, &old_folder, &old_records)?;
let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
let mut new_records =
read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
new_records.retain(|r| r.path != record.path);
new_records.push(record);
sort_records(&mut new_records);
write_type_folder_artifacts(store, &new_folder, &new_records)?;
update_parents(store, &old_folder)?;
update_parents(store, &new_folder)?;
Ok(())
}
pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
let file_rel = normalize_rel(file);
let folder = type_folder_of(&file_rel)
.ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
let before = records.len();
records.retain(|r| r.path != file_rel);
if records.len() == before {
}
sort_records(&mut records);
write_type_folder_artifacts(store, &folder, &records)?;
update_parents(store, &folder)?;
Ok(())
}
pub fn rebuild_all(store: &Store) -> crate::Result<()> {
Index::cleanup(store)?;
for layer in Layer::all() {
for tf in type_folders_in_layer(store, layer) {
let idx = Index::build_type_folder(store, &tf)?;
if idx.records.is_empty() {
continue;
}
write_type_folder_artifacts(store, &tf, &idx.records)?;
}
let layer_idx = Index::build_layer(store, layer)?;
let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
if layer_idx.child_counts.is_empty() {
remove_if_exists(&layer_index_md)?;
} else {
write_atomic(
&layer_index_md,
render_layer_md_with_store(store, &layer_idx),
)?;
}
}
let root_idx = Index::build_root(store)?;
let root_index_md = store.root.join("index.md");
if root_idx.child_counts.is_empty() {
remove_if_exists(&root_index_md)?;
} else {
write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
}
Ok(())
}
pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
match level {
IndexLevel::TypeFolder(folder) => {
let idx = Index::build_type_folder(store, folder)?;
if idx.records.is_empty() {
remove_if_exists(&store.root.join(folder).join("index.md"))?;
remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
} else {
write_type_folder_artifacts(store, folder, &idx.records)?;
}
}
IndexLevel::Layer(layer) => {
let idx = Index::build_layer(store, *layer)?;
let p = store.root.join(layer_dir_name(*layer)).join("index.md");
if idx.child_counts.is_empty() {
remove_if_exists(&p)?;
} else {
write_atomic(&p, render_layer_md_with_store(store, &idx))?;
}
}
IndexLevel::Root => {
let idx = Index::build_root(store)?;
let p = store.root.join("index.md");
if idx.child_counts.is_empty() {
remove_if_exists(&p)?;
} else {
write_atomic(&p, render_root_md_with_store(store, &idx))?;
}
}
}
Ok(())
}
pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
let mut out = String::new();
match level {
IndexLevel::TypeFolder(folder) => {
let idx = Index::build_type_folder(store, folder)?;
let md_path = path_to_unix(&folder.join("index.md"));
let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
out.push_str(&format!("--- {md_path} ---\n"));
out.push_str(&idx.to_markdown());
out.push_str(&format!("--- {jsonl_path} ---\n"));
out.push_str(&idx.to_jsonl());
}
IndexLevel::Layer(layer) => {
let idx = Index::build_layer(store, *layer)?;
let md_path = format!("{}/index.md", layer_dir_name(*layer));
out.push_str(&format!("--- {md_path} ---\n"));
out.push_str(&render_layer_md_with_store(store, &idx));
}
IndexLevel::Root => {
let idx = Index::build_root(store)?;
out.push_str("--- index.md ---\n");
out.push_str(&render_root_md_with_store(store, &idx));
}
}
Ok(out)
}
pub fn cleanup(store: &Store) -> crate::Result<()> {
for layer in Layer::all() {
let layer_dir = store.root.join(layer_dir_name(layer));
if !layer_dir.is_dir() {
continue;
}
for tf in type_folders_in_layer(store, layer) {
let tf_abs = store.root.join(&tf);
for entry in walkdir::WalkDir::new(&tf_abs)
.min_depth(1)
.into_iter()
.filter_map(|e| e.ok())
{
let p = entry.path();
if is_index_artifact(p) {
remove_if_exists(p)?;
}
}
if walk_type_folder_files(&tf_abs).is_empty() {
remove_if_exists(&tf_abs.join("index.md"))?;
remove_if_exists(&tf_abs.join("index.jsonl"))?;
}
}
}
Ok(())
}
}
fn write_type_folder_artifacts(
store: &Store,
folder: &Path,
records: &[IndexRecord],
) -> crate::Result<()> {
let folder_abs = store.root.join(folder);
let md_path = folder_abs.join("index.md");
let jsonl_path = folder_abs.join("index.jsonl");
if records.is_empty() {
remove_if_exists(&md_path)?;
remove_if_exists(&jsonl_path)?;
return Ok(());
}
let idx = Index {
level: IndexLevel::TypeFolder(folder.to_path_buf()),
records: records.to_vec(),
child_counts: BTreeMap::new(),
};
write_atomic(&md_path, idx.to_markdown())?;
write_atomic(&jsonl_path, idx.to_jsonl())?;
Ok(())
}
fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
let layer = folder
.components()
.next()
.and_then(|c| c.as_os_str().to_str())
.and_then(layer_from_dir_name);
if let Some(layer) = layer {
let idx = Index {
level: IndexLevel::Layer(layer),
records: Vec::new(),
child_counts: child_counts_from_jsonl(store, &[layer])?,
};
let p = store.root.join(layer_dir_name(layer)).join("index.md");
if idx.child_counts.is_empty() {
remove_if_exists(&p)?;
} else {
write_atomic(&p, render_layer_md_with_store(store, &idx))?;
}
}
let root = Index {
level: IndexLevel::Root,
records: Vec::new(),
child_counts: child_counts_from_jsonl(store, &Layer::all())?,
};
let rp = store.root.join("index.md");
if root.child_counts.is_empty() {
remove_if_exists(&rp)?;
} else {
write_atomic(&rp, render_root_md_with_store(store, &root))?;
}
Ok(())
}
fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
let layer = match idx.level {
IndexLevel::Layer(l) => l,
_ => unreachable!("render_layer_md_with_store called on non-layer"),
};
let layer_dir = layer_dir_name(layer);
let mut max_upd: Option<DateTime<FixedOffset>> = None;
let mut entries = String::new();
for (tf, n) in &idx.child_counts {
let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
let newest = recs.first();
if let Some(u) = newest.and_then(|r| r.updated) {
max_upd = Some(match max_upd {
Some(cur) if cur >= u => cur,
_ => u,
});
}
let tf_unix = path_to_unix(tf);
let display = capitalize(folder_basename(tf));
let preview = newest
.map(|r| truncate(&r.summary, 80))
.filter(|p| !p.is_empty() && p != MISSING_SUMMARY);
match preview {
Some(p) => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n}) — {p}\n")),
None => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n")),
}
}
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: layer\n");
s.push_str(&format!("folder: {layer_dir}\n"));
if let Some(ts) = max_upd {
s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
}
s.push_str("---\n\n");
s.push_str(&format!("# {layer_dir}\n\n"));
s.push_str(&entries);
s
}
fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
let mut max_upd: Option<DateTime<FixedOffset>> = None;
for tf in idx.child_counts.keys() {
let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
if let Some(u) = recs.first().and_then(|r| r.updated) {
max_upd = Some(match max_upd {
Some(cur) if cur >= u => cur,
_ => u,
});
}
}
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: root\n");
if let Some(ts) = max_upd {
s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
}
s.push_str("---\n\n");
s.push_str(&format!("# {ROOT_TITLE}\n"));
for layer in Layer::all() {
let layer_dir = layer_dir_name(layer);
let prefix = format!("{layer_dir}/");
let children: Vec<(&PathBuf, &usize)> = idx
.child_counts
.iter()
.filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
.collect();
if children.is_empty() {
continue;
}
let total: usize = children.iter().map(|(_, n)| **n).sum();
s.push('\n');
s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
for (tf, n) in children {
let tf_unix = path_to_unix(tf);
let display = capitalize(folder_basename(tf));
s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
}
}
s
}
fn format_md_entry(rec: &IndexRecord) -> String {
let path = wiki_target(&rec.path);
let mut line = format!("- [[{path}]] — {}", rec.summary);
if !rec.tags.is_empty() {
let tags = rec
.tags
.iter()
.map(|t| format!("#{t}"))
.collect::<Vec<_>>()
.join(" ");
line.push_str(&format!(" · {tags}"));
}
line
}
fn more_footer(total: usize, type_: &str, layer: &str) -> String {
format!(
"## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd index query --type {type_} --in {layer}` for the complete catalog.\n"
)
}
fn sort_records(records: &mut [IndexRecord]) {
records.sort_by(|a, b| {
match (b.updated, a.updated) {
(Some(bu), Some(au)) => bu.cmp(&au),
(Some(_), None) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Less, (None, None) => std::cmp::Ordering::Equal,
}
.then_with(|| a.path.cmp(&b.path))
});
}
impl IndexRecord {
pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
record_from_file(abs, rel)
}
}
fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
let meta = read_frontmatter(abs)?;
Ok(IndexRecord {
path: rel,
type_: meta.type_.unwrap_or_default(),
summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
tags: meta.tags,
links: meta.links,
created: meta.created,
updated: meta.updated,
fields: meta.fields,
})
}
struct FileMeta {
type_: Option<String>,
summary: Option<String>,
tags: Vec<String>,
links: Vec<String>,
created: Option<DateTime<FixedOffset>>,
updated: Option<DateTime<FixedOffset>>,
fields: BTreeMap<String, Value>,
}
fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
let text = fs::read_to_string(abs)?;
let yaml = extract_frontmatter_block(&text).unwrap_or_default();
let map: serde_yml::Mapping = if yaml.trim().is_empty() {
serde_yml::Mapping::new()
} else {
serde_yml::from_str(&yaml).map_err(|e| {
crate::Error::Store(crate::store::StoreError::BadTypeIndex {
path: abs.to_path_buf(),
message: format!("frontmatter YAML: {e}"),
})
})?
};
let mut type_ = None;
let mut summary = None;
let mut tags = Vec::new();
let mut links = Vec::new();
let mut created = None;
let mut updated = None;
let mut fields = BTreeMap::new();
for (k, v) in map {
let key = match k.as_str() {
Some(s) => s.to_string(),
None => continue,
};
match key.as_str() {
"type" => type_ = v.as_str().map(str::to_string),
"summary" => summary = v.as_str().map(str::to_string),
"tags" => tags = yaml_string_list(&v),
"links" => links = yaml_string_list(&v),
"created" => created = v.as_str().and_then(parse_ts),
"updated" => updated = v.as_str().and_then(parse_ts),
"path" => {}
_ => {
if let Ok(jv) = serde_json::to_value(&v) {
fields.insert(key, jv);
}
}
}
}
Ok(FileMeta {
type_,
summary,
tags,
links,
created,
updated,
fields,
})
}
fn extract_frontmatter_block(text: &str) -> Option<String> {
let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
let mut lines = trimmed.lines();
let first = lines.next()?;
if first.trim_end() != "---" {
return None;
}
let mut block = String::new();
for line in lines {
if line.trim_end() == "---" {
return Some(block);
}
block.push_str(line);
block.push('\n');
}
None }
fn yaml_string_list(v: &serde_yml::Value) -> Vec<String> {
match v {
serde_yml::Value::String(s) => vec![s.clone()],
serde_yml::Value::Sequence(seq) => seq
.iter()
.filter_map(|item| item.as_str().map(str::to_string))
.collect(),
_ => Vec::new(),
}
}
fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
DateTime::parse_from_rfc3339(s.trim()).ok()
}
fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
}
fn max_updated<'a>(
it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
) -> Option<DateTime<FixedOffset>> {
let mut best: Option<DateTime<FixedOffset>> = None;
for ts in it.flatten() {
best = Some(match best {
Some(cur) if cur >= *ts => cur,
_ => *ts,
});
}
best
}
fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
let text = match fs::read_to_string(jsonl) {
Ok(t) => t,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
Err(e) => return Err(e.into()),
};
let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
for (i, line) in text.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
crate::Error::Store(crate::store::StoreError::BadTypeIndex {
path: jsonl.to_path_buf(),
message: format!("line {}: {e}", i + 1),
})
})?;
by_path.insert(rec.path.clone(), rec);
}
let mut records: Vec<IndexRecord> = by_path.into_values().collect();
sort_records(&mut records);
Ok(records)
}
fn jsonl_record_count(jsonl: &Path) -> crate::Result<usize> {
let text = match fs::read_to_string(jsonl) {
Ok(t) => t,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(0),
Err(e) => return Err(e.into()),
};
let mut paths: BTreeSet<PathBuf> = BTreeSet::new();
for (i, line) in text.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
crate::Error::Store(crate::store::StoreError::BadTypeIndex {
path: jsonl.to_path_buf(),
message: format!("line {}: {e}", i + 1),
})
})?;
paths.insert(rec.path);
}
Ok(paths.len())
}
fn child_counts_from_jsonl(
store: &Store,
layers: &[Layer],
) -> crate::Result<BTreeMap<PathBuf, usize>> {
let mut child_counts = BTreeMap::new();
for &layer in layers {
for tf in type_folders_in_layer(store, layer) {
let n = jsonl_record_count(&store.root.join(&tf).join("index.jsonl"))?;
if n > 0 {
child_counts.insert(tf, n);
}
}
}
Ok(child_counts)
}
fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
let mut out = Vec::new();
if !folder_abs.is_dir() {
return out;
}
for entry in walkdir::WalkDir::new(folder_abs)
.into_iter()
.filter_entry(|e| !is_hidden(e.file_name()))
.filter_map(|e| e.ok())
{
if !entry.file_type().is_file() {
continue;
}
let p = entry.path();
if p.extension().and_then(|e| e.to_str()) != Some("md") {
continue;
}
if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
continue;
}
out.push(p.to_path_buf());
}
out
}
fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
let layer_dir = store.root.join(layer_dir_name(layer));
let mut out = Vec::new();
let rd = match fs::read_dir(&layer_dir) {
Ok(rd) => rd,
Err(_) => return out,
};
for entry in rd.flatten() {
if !entry.path().is_dir() {
continue;
}
let name = entry.file_name();
let name = match name.to_str() {
Some(n) => n,
None => continue,
};
if is_hidden(entry.file_name().as_os_str()) || name == "log" {
continue;
}
out.push(PathBuf::from(layer_dir_name(layer)).join(name));
}
out.sort();
out
}
fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
let mut comps = file_rel.components();
let layer = comps.next()?.as_os_str().to_str()?;
layer_from_dir_name(layer)?;
let type_seg = comps.next()?.as_os_str().to_str()?;
Some(PathBuf::from(layer).join(type_seg))
}
fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
}
fn normalize_rel(p: &Path) -> PathBuf {
let s = path_to_unix(p);
let s = s.strip_prefix("./").unwrap_or(&s);
PathBuf::from(s)
}
fn is_index_artifact(p: &Path) -> bool {
matches!(
p.file_name().and_then(|n| n.to_str()),
Some("index.md") | Some("index.jsonl")
)
}
fn is_hidden(name: &std::ffi::OsStr) -> bool {
name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
}
fn layer_dir_name(layer: Layer) -> &'static str {
match layer {
Layer::Sources => "sources",
Layer::Records => "records",
Layer::Wiki => "wiki",
}
}
fn layer_from_dir_name(name: &str) -> Option<Layer> {
match name {
"sources" => Some(Layer::Sources),
"records" => Some(Layer::Records),
"wiki" => Some(Layer::Wiki),
_ => None,
}
}
fn folder_basename(p: &Path) -> &str {
p.file_name().and_then(|n| n.to_str()).unwrap_or("")
}
fn wiki_target(p: &Path) -> String {
let unix = path_to_unix(p);
unix.strip_suffix(".md").unwrap_or(&unix).to_string()
}
fn path_to_unix(p: &Path) -> String {
p.components()
.filter_map(|c| c.as_os_str().to_str())
.collect::<Vec<_>>()
.join("/")
}
fn capitalize(s: &str) -> String {
let mut chars = s.chars();
match chars.next() {
Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
None => String::new(),
}
}
fn truncate(s: &str, max: usize) -> String {
let one_line: String = s.split_whitespace().collect::<Vec<_>>().join(" ");
if one_line.chars().count() <= max {
one_line
} else {
one_line.chars().take(max).collect()
}
}
fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let dir = path.parent().unwrap_or_else(|| Path::new("."));
let mut tmp = tempfile_in(dir)?;
tmp.write_all(contents.as_bytes())?;
tmp.flush()?;
tmp.persist(path)?;
Ok(())
}
fn remove_if_exists(path: &Path) -> crate::Result<()> {
match fs::remove_file(path) {
Ok(()) => Ok(()),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
Err(e) => Err(e.into()),
}
}
fn bad_index(path: &Path, msg: &str) -> crate::Error {
crate::Error::Store(crate::store::StoreError::BadTypeIndex {
path: path.to_path_buf(),
message: msg.to_string(),
})
}
struct AtomicTemp {
file: Option<fs::File>,
path: PathBuf,
persisted: bool,
}
impl AtomicTemp {
fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
self.file.as_mut().expect("temp file open").write_all(bytes)
}
fn flush(&mut self) -> std::io::Result<()> {
self.file.as_mut().expect("temp file open").flush()
}
fn persist(mut self, dest: &Path) -> std::io::Result<()> {
if let Some(f) = self.file.take() {
f.sync_all().ok();
}
fs::rename(&self.path, dest)?;
self.persisted = true;
Ok(())
}
}
impl Drop for AtomicTemp {
fn drop(&mut self) {
if !self.persisted {
let _ = fs::remove_file(&self.path);
}
}
}
fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
use std::time::{SystemTime, UNIX_EPOCH};
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
let pid = std::process::id();
let counter = next_temp_counter();
let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
let path = dir.join(name);
let file = fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&path)?;
Ok(AtomicTemp {
file: Some(file),
path,
persisted: false,
})
}
fn next_temp_counter() -> u64 {
use std::sync::atomic::{AtomicU64, Ordering};
static C: AtomicU64 = AtomicU64::new(0);
C.fetch_add(1, Ordering::Relaxed)
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::BTreeSet;
use std::fs;
use tempfile::TempDir;
fn mk_store() -> (TempDir, Store) {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
let store = Store {
root: dir.path().to_path_buf(),
config: crate::parser::Config::default(),
};
(dir, store)
}
fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
let abs = store.root.join(rel);
fs::create_dir_all(abs.parent().unwrap()).unwrap();
fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
}
fn write_doc(
store: &Store,
rel: &str,
type_: &str,
summary: Option<&str>,
updated: Option<&str>,
extra_yaml: &str,
) {
let mut fm = format!("type: {type_}\n");
if let Some(s) = summary {
fm.push_str(&format!("summary: {s}\n"));
}
if let Some(u) = updated {
fm.push_str(&format!("updated: {u}\n"));
}
fm.push_str(extra_yaml);
write_raw(store, rel, fm.trim_end(), "\nbody text\n");
}
fn read(store: &Store, rel: &str) -> String {
fs::read_to_string(store.root.join(rel)).unwrap()
}
fn exists(store: &Store, rel: &str) -> bool {
store.root.join(rel).exists()
}
fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
let mut out = BTreeMap::new();
for entry in walkdir::WalkDir::new(&store.root)
.into_iter()
.filter_map(|e| e.ok())
{
let p = entry.path();
if is_index_artifact(p) {
let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
out.insert(rel, fs::read_to_string(p).unwrap());
}
}
out
}
#[test]
fn type_folder_aggregates_across_shards_in_recency_order() {
let (_d, store) = mk_store();
write_doc(
&store,
"sources/emails/2026/05/b-old.md",
"email",
Some("Older mail"),
Some("2026-05-01T09:00:00Z"),
"",
);
write_doc(
&store,
"sources/emails/2026/06/c-new.md",
"email",
Some("Newest mail"),
Some("2026-06-15T12:00:00Z"),
"",
);
write_doc(
&store,
"sources/emails/2026/05/a-mid.md",
"email",
Some("Middle mail"),
Some("2026-05-20T08:00:00Z"),
"",
);
let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
assert_eq!(
paths,
vec![
"sources/emails/2026/06/c-new.md",
"sources/emails/2026/05/a-mid.md",
"sources/emails/2026/05/b-old.md",
],
"records must aggregate across shards, newest `updated` first"
);
}
#[test]
fn type_folder_md_format_entries_tags_and_derived_updated() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/sarah-chen.md",
"contact",
Some("Renewal champion at Acme"),
Some("2026-05-27T10:00:00Z"),
"tags:\n - renewal\n - acme\n",
);
write_doc(
&store,
"records/contacts/no-tags.md",
"contact",
Some("Plain contact"),
Some("2026-05-26T10:00:00Z"),
"",
);
let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
let md = idx.to_markdown();
assert!(md.starts_with(
"---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
), "frontmatter/heading wrong:\n{md}");
assert!(
md.contains(
"- [[records/contacts/sarah-chen]] — Renewal champion at Acme · #renewal #acme\n"
),
"tagged entry wrong:\n{md}"
);
assert!(
md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
"untagged entry wrong:\n{md}"
);
assert!(
!md.contains("Plain contact ·"),
"untagged entry must not emit a tag separator"
);
assert!(!md.contains("## More"), "no footer expected under the cap");
}
#[test]
fn missing_summary_becomes_placeholder_not_invented() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/notes/x.md",
"note",
None,
Some("2026-05-27T10:00:00Z"),
"",
);
let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
let md = idx.to_markdown();
assert!(
md.contains("- [[records/notes/x]] — (no summary)\n"),
"missing summary must render the placeholder, not invent text:\n{md}"
);
}
#[test]
fn jsonl_is_complete_structured_and_round_trips() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/expenses/2026/05/e1.md",
"expense",
Some("Lunch with vendor"),
Some("2026-05-10T10:00:00Z"),
"created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ntags:\n - food\nlinks:\n - wiki/themes/spend\n",
);
write_doc(
&store,
"records/expenses/2026/06/e2.md",
"expense",
Some("Cloud bill"),
Some("2026-06-01T10:00:00Z"),
"amount: 100\n",
);
let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
let jsonl = idx.to_jsonl();
let lines: Vec<&str> = jsonl.lines().collect();
assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
assert_eq!(
r0, idx.records[0],
"jsonl line must round-trip to the record"
);
let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
assert_eq!(r1.type_, "expense");
assert_eq!(r1.summary, "Lunch with vendor");
assert_eq!(r1.tags, vec!["food".to_string()]);
assert_eq!(r1.links, vec!["wiki/themes/spend".to_string()]);
assert_eq!(
r1.created,
Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
);
assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
for reserved in [
"path", "type", "summary", "tags", "links", "created", "updated",
] {
assert!(
!r1.fields.contains_key(reserved),
"reserved key {reserved} must not appear in fields"
);
}
assert!(
lines[1].starts_with(
r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["wiki/themes/spend"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
),
"jsonl key order not stable:\n{}",
lines[1]
);
assert!(
lines[1].ends_with(r#""amount":42,"status":"paid"}"#),
"extras must be sorted:\n{}",
lines[1]
);
}
#[test]
fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
let (_d, store) = mk_store();
let total = MD_CAP + 7;
for i in 0..total {
let day = 1 + (i % 27);
let rel = format!("sources/emails/2026/05/m-{i:04}.md");
let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
write_doc(
&store,
&rel,
"email",
Some(&format!("mail {i}")),
Some(&updated),
"",
);
}
let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
let md = idx.to_markdown();
let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
assert!(
md.contains("## More\n\n"),
"over-cap md needs a More footer"
);
assert!(
md.contains(&format!(
"This folder has {total} files. The 500 most recent are listed above.\n"
)),
"footer count wrong:\n{md}"
);
assert!(
md.contains(
"Use `dbmd index query --type email --in sources` for the complete catalog.\n"
),
"footer must infer type=email layer=sources:\n{md}"
);
let jsonl = idx.to_jsonl();
assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
}
#[test]
fn sort_breaks_ties_by_path_and_puts_undated_last() {
let mut recs = vec![
rec("z/a.md", Some("2026-05-01T00:00:00Z")),
rec("a/b.md", Some("2026-05-01T00:00:00Z")), rec("m/c.md", None), rec("b/d.md", Some("2026-06-01T00:00:00Z")), ];
sort_records(&mut recs);
let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
}
fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
IndexRecord {
path: PathBuf::from(path),
type_: "t".into(),
summary: "s".into(),
tags: vec![],
links: vec![],
created: None,
updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
fields: BTreeMap::new(),
}
}
#[test]
fn layer_index_lists_type_folders_with_counts_and_preview() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/a.md",
"contact",
Some("Contact A older"),
Some("2026-05-01T00:00:00Z"),
"",
);
write_doc(
&store,
"records/contacts/b.md",
"contact",
Some("Contact B newest"),
Some("2026-05-09T00:00:00Z"),
"",
);
write_doc(
&store,
"records/companies/x.md",
"company",
Some("Acme Inc"),
Some("2026-05-05T00:00:00Z"),
"",
);
Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
let md = read(&store, "records/index.md");
assert!(
md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
"layer fm:\n{md}"
);
let companies_at = md.find("companies/index").unwrap();
let contacts_at = md.find("contacts/index").unwrap();
assert!(
companies_at < contacts_at,
"type folders must be alphabetical"
);
assert!(
md.contains("- [[records/contacts/index|Contacts]] (2) — Contact B newest\n"),
"contacts entry:\n{md}"
);
assert!(
md.contains("- [[records/companies/index|Companies]] (1) — Acme Inc\n"),
"companies entry:\n{md}"
);
assert!(
md.contains("updated: 2026-05-09T00:00:00Z\n"),
"layer updated must be max child:\n{md}"
);
}
#[test]
fn root_index_groups_layers_with_totals_and_per_type_counts() {
let (_d, store) = mk_store();
write_doc(
&store,
"sources/emails/2026/05/a.md",
"email",
Some("Mail"),
Some("2026-05-01T00:00:00Z"),
"",
);
write_doc(
&store,
"sources/docs/d.md",
"doc",
Some("Doc"),
Some("2026-05-02T00:00:00Z"),
"",
);
write_doc(
&store,
"records/contacts/c.md",
"contact",
Some("C"),
Some("2026-05-03T00:00:00Z"),
"",
);
Index::rebuild_all(&store).unwrap();
let md = read(&store, "index.md");
assert!(
md.starts_with("---\ntype: index\nscope: root\n"),
"root fm:\n{md}"
);
assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
let sources_h = md
.find("## Sources (2)")
.expect("sources heading w/ total 2");
let records_h = md
.find("## Records (1)")
.expect("records heading w/ total 1");
assert!(sources_h < records_h, "Sources must precede Records");
assert!(!md.contains("## Wiki"), "empty layer gets no section");
assert!(
md.contains("- [[sources/docs/index|Docs]] (1)\n"),
"root docs entry:\n{md}"
);
assert!(
md.contains("- [[sources/emails/index|Emails]] (1)\n"),
"root emails entry:\n{md}"
);
assert!(
md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
"root contacts entry:\n{md}"
);
assert!(!md.contains("— "), "root entries carry no preview text");
}
#[test]
fn on_write_matches_rebuild_byte_for_byte() {
let (_d1, wt) = mk_store();
let (_d2, rb) = mk_store();
let docs: &[(&str, &str, &str, &str, &str)] = &[
(
"sources/emails/2026/05/e1.md",
"email",
"First mail",
"2026-05-01T10:00:00Z",
"tags:\n - inbox\n",
),
(
"sources/emails/2026/06/e2.md",
"email",
"Second mail",
"2026-06-01T10:00:00Z",
"",
),
(
"records/contacts/sarah.md",
"contact",
"Sarah",
"2026-05-15T10:00:00Z",
"links:\n - wiki/people/sarah\n",
),
(
"records/contacts/elena.md",
"contact",
"Elena",
"2026-05-20T10:00:00Z",
"status: active\n",
),
(
"wiki/people/sarah.md",
"wiki-page",
"Sarah bio",
"2026-05-21T10:00:00Z",
"",
),
];
for (rel, t, sum, upd, extra) in docs {
write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
Index::on_write(&wt, Path::new(rel)).unwrap();
}
Index::rebuild_all(&rb).unwrap();
let a = snapshot_artifacts(&wt);
let b = snapshot_artifacts(&rb);
assert_eq!(
a.keys().collect::<Vec<_>>(),
b.keys().collect::<Vec<_>>(),
"same set of index artifacts must exist"
);
for (k, v) in &a {
assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
}
assert!(a.contains_key("index.md"));
assert!(a.contains_key("sources/emails/index.jsonl"));
assert!(a.contains_key("records/contacts/index.md"));
}
#[test]
fn loop_op_does_not_walk_sibling_content_tree() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/companies/acme.md",
"company",
Some("Acme Inc"),
Some("2026-05-05T00:00:00Z"),
"",
);
write_doc(
&store,
"records/companies/globex.md",
"company",
Some("Globex"),
Some("2026-05-06T00:00:00Z"),
"",
);
assert!(
!exists(&store, "records/companies/index.jsonl"),
"precondition: companies must be un-indexed"
);
write_doc(
&store,
"records/contacts/sarah.md",
"contact",
Some("Sarah"),
Some("2026-05-15T00:00:00Z"),
"",
);
Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
let layer_md = read(&store, "records/index.md");
let root_md = read(&store, "index.md");
assert!(
layer_md.contains("- [[records/contacts/index|Contacts]] (1) — Sarah\n"),
"layer must reflect the written folder:\n{layer_md}"
);
assert!(
root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
"root must reflect the written folder:\n{root_md}"
);
assert!(
!layer_md.contains("companies"),
"loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
);
assert!(
!root_md.contains("companies"),
"loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
);
assert!(
root_md.contains("## Records (1)"),
"root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
);
let (_d2, rb) = mk_store();
for (rel, t, s, u) in [
(
"records/companies/acme.md",
"company",
"Acme Inc",
"2026-05-05T00:00:00Z",
),
(
"records/companies/globex.md",
"company",
"Globex",
"2026-05-06T00:00:00Z",
),
(
"records/contacts/sarah.md",
"contact",
"Sarah",
"2026-05-15T00:00:00Z",
),
] {
write_doc(&rb, rel, t, Some(s), Some(u), "");
}
Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
Index::rebuild_all(&rb).unwrap();
let a = snapshot_artifacts(&store);
let b = snapshot_artifacts(&rb);
assert_eq!(
a.keys().collect::<BTreeSet<_>>(),
b.keys().collect::<BTreeSet<_>>(),
"same artifact set after indexing both folders"
);
for (k, v) in &a {
assert_eq!(
v, &b[k],
"after indexing the sibling too, loop result must equal rebuild for {k}"
);
}
assert!(
read(&store, "index.md").contains("## Records (3)"),
"now that both folders are indexed, the root total is 3"
);
}
#[test]
fn wiki_page_at_shard_path_for_is_indexable_end_to_end() {
let (_d1, wt) = mk_store();
let (_d2, rb) = mk_store();
let rel = wt
.shard_path_for(
"wiki-page",
&crate::parser::Frontmatter::default(),
"renewal-theme",
)
.unwrap();
let rel_str = path_to_unix(&rel);
assert!(
type_folder_of(&rel).is_some(),
"shard_path_for produced a path the index cannot file: {rel_str}"
);
write_doc(
&wt,
&rel_str,
"wiki-page",
Some("Renewal theme"),
Some("2026-05-21T10:00:00Z"),
"",
);
write_doc(
&rb,
&rel_str,
"wiki-page",
Some("Renewal theme"),
Some("2026-05-21T10:00:00Z"),
"",
);
Index::on_write(&wt, &rel)
.expect("on_write must succeed for a toolkit-computed wiki-page path");
Index::rebuild_all(&rb).unwrap();
let page_link = wiki_target(&rel); let tf_md = read(&rb, "wiki/topics/index.md");
assert!(
tf_md.contains(&format!("[[{page_link}]]")),
"type-folder index must list the page link, got:\n{tf_md}"
);
assert!(
exists(&rb, "wiki/topics/index.jsonl"),
"type-folder jsonl must exist"
);
assert!(
read(&rb, "wiki/topics/index.jsonl").contains(&rel_str),
"type-folder jsonl must contain the page row"
);
let layer_md = read(&rb, "wiki/index.md");
assert!(
layer_md.contains("wiki/topics/index"),
"layer index must roll up the wiki/topics type-folder, got:\n{layer_md}"
);
let a = snapshot_artifacts(&wt);
let b = snapshot_artifacts(&rb);
assert_eq!(
a.keys().collect::<Vec<_>>(),
b.keys().collect::<Vec<_>>(),
"loop and sweep must produce the same artifact set"
);
for (k, v) in &a {
assert_eq!(
v, &b[k],
"wiki-page artifact {k} differs between on_write and rebuild"
);
}
}
#[test]
fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
let (_d1, wt) = mk_store();
let (_d2, rb) = mk_store();
let total = MD_CAP + 3; let mut all_rels = Vec::new();
for i in 0..total {
let rel = format!("sources/emails/2026/05/m-{i:04}.md");
let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
write_doc(
&wt,
&rel,
"email",
Some(&format!("mail {i}")),
Some(&updated),
"",
);
write_doc(
&rb,
&rel,
"email",
Some(&format!("mail {i}")),
Some(&updated),
"",
);
all_rels.push(rel);
}
Index::rebuild_all(&wt).unwrap();
let newest = &all_rels[total - 1]; fs::remove_file(wt.root.join(newest)).unwrap();
Index::on_remove(&wt, Path::new(newest)).unwrap();
fs::remove_file(rb.root.join(newest)).unwrap();
Index::rebuild_all(&rb).unwrap();
let a = snapshot_artifacts(&wt);
let b = snapshot_artifacts(&rb);
for (k, v) in &a {
assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
}
let md = read(&wt, "sources/emails/index.md");
assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
assert!(
!md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
"removed file must not be listed in md"
);
let pulled_in = &all_rels[2];
assert!(
md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
"the 501st-most-recent must be pulled into the browse view after a removal"
);
assert!(
md.contains(&format!("This folder has {} files.", total - 1)),
"footer count must decrement:\n{}",
md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
);
let jsonl = read(&wt, "sources/emails/index.jsonl");
assert_eq!(
jsonl.lines().count(),
total - 1,
"jsonl loses exactly the removed file"
);
assert!(
!jsonl.contains(&path_to_unix(Path::new(newest))),
"removed file must be gone from the jsonl too"
);
}
#[test]
fn on_rename_cross_folder_matches_rebuild() {
let (_d1, wt) = mk_store();
let (_d2, rb) = mk_store();
let seed: &[(&str, &str, &str, &str)] = &[
(
"records/contacts/a.md",
"contact",
"A",
"2026-05-01T00:00:00Z",
),
(
"records/contacts/b.md",
"contact",
"B",
"2026-05-02T00:00:00Z",
),
(
"records/companies/x.md",
"company",
"X",
"2026-05-03T00:00:00Z",
),
];
for (rel, t, s, u) in seed {
write_doc(&wt, rel, t, Some(s), Some(u), "");
write_doc(&rb, rel, t, Some(s), Some(u), "");
}
Index::rebuild_all(&wt).unwrap();
let old = "records/contacts/b.md";
let new = "records/companies/b.md";
fs::create_dir_all(wt.root.join("records/companies")).unwrap();
fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
fs::create_dir_all(rb.root.join("records/companies")).unwrap();
fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
Index::rebuild_all(&rb).unwrap();
let a = snapshot_artifacts(&wt);
let b = snapshot_artifacts(&rb);
assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
for (k, v) in &a {
assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
}
let contacts = read(&wt, "records/contacts/index.md");
assert!(!contacts.contains("records/contacts/b]]"));
let companies = read(&wt, "records/companies/index.md");
assert!(companies.contains("[[records/companies/b]]"));
}
#[test]
fn on_write_updates_existing_entry_in_place() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/a.md",
"contact",
Some("Original"),
Some("2026-05-01T00:00:00Z"),
"",
);
Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
write_doc(
&store,
"records/contacts/a.md",
"contact",
Some("Revised"),
Some("2026-05-09T00:00:00Z"),
"",
);
Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
let jsonl = read(&store, "records/contacts/index.jsonl");
assert_eq!(
jsonl.lines().count(),
1,
"upsert must not duplicate the line"
);
assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
assert!(
!jsonl.contains("Original"),
"stale line must be gone (compacted)"
);
let md = read(&store, "records/contacts/index.md");
assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
assert!(
md.contains("updated: 2026-05-09T00:00:00Z\n"),
"index updated must track the newer member"
);
}
#[test]
fn dry_run_emits_separators_and_writes_nothing() {
let (_d, store) = mk_store();
write_doc(
&store,
"sources/emails/2026/05/a.md",
"email",
Some("Mail"),
Some("2026-05-01T00:00:00Z"),
"",
);
let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
.unwrap();
assert!(
out.contains("--- sources/emails/index.md ---\n"),
"md separator:\n{out}"
);
assert!(
out.contains("--- sources/emails/index.jsonl ---\n"),
"jsonl separator:\n{out}"
);
assert!(
out.contains("- [[sources/emails/2026/05/a]] — Mail"),
"md body present"
);
assert!(
!exists(&store, "sources/emails/index.md"),
"dry-run must not write"
);
assert!(
!exists(&store, "sources/emails/index.jsonl"),
"dry-run must not write"
);
}
#[test]
fn cleanup_removes_noncanonical_and_empty_indexes() {
let (_d, store) = mk_store();
write_doc(
&store,
"sources/emails/2026/05/a.md",
"email",
Some("Mail"),
Some("2026-05-01T00:00:00Z"),
"",
);
fs::write(
store.root.join("sources/emails/2026/05/index.md"),
"stale\n",
)
.unwrap();
fs::write(
store.root.join("sources/emails/2026/05/index.jsonl"),
"stale\n",
)
.unwrap();
fs::create_dir_all(store.root.join("records/empty")).unwrap();
fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
Index::cleanup(&store).unwrap();
assert!(
!exists(&store, "sources/emails/2026/05/index.md"),
"shard index must be deleted"
);
assert!(
!exists(&store, "sources/emails/2026/05/index.jsonl"),
"shard jsonl must be deleted"
);
assert!(
!exists(&store, "records/empty/index.md"),
"empty-folder index must be deleted"
);
assert!(exists(&store, "sources/emails/2026/05/a.md"));
}
#[test]
fn rebuild_deletes_stale_indexes_for_emptied_folders() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/a.md",
"contact",
Some("A"),
Some("2026-05-01T00:00:00Z"),
"",
);
Index::rebuild_all(&store).unwrap();
assert!(exists(&store, "records/contacts/index.md"));
assert!(exists(&store, "records/index.md"));
assert!(exists(&store, "index.md"));
fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
Index::rebuild_all(&store).unwrap();
assert!(
!exists(&store, "records/contacts/index.md"),
"emptied type-folder index gone"
);
assert!(
!exists(&store, "records/index.md"),
"now-empty layer index gone"
);
assert!(!exists(&store, "index.md"), "now-empty root index gone");
}
#[test]
fn property_writethrough_equals_rebuild_under_mixed_ops() {
let (_d1, wt) = mk_store();
let (_d2, rb) = mk_store();
let mut seed: u64 = 0x9E3779B97F4A7C15;
let mut next = || {
seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
(seed >> 33) as u32
};
let folders = ["sources/emails", "records/contacts", "wiki/people"];
let types = ["email", "contact", "wiki-page"];
let mut live: Vec<String> = Vec::new();
for step in 0..120u32 {
let r = next();
let op = r % 10;
if op < 6 || live.is_empty() {
let fi = (next() as usize) % folders.len();
let folder = folders[fi];
let id = next() % 40;
let rel = if folder == "sources/emails" {
let month = 5 + (id % 2); format!("{folder}/2026/{month:02}/f-{id:02}.md")
} else {
format!("{folder}/f-{id:02}.md")
};
let updated = format!(
"2026-05-{:02}T{:02}:{:02}:00Z",
1 + (step % 27),
step % 24,
id % 60
);
let extra = if id % 3 == 0 {
"tags:\n - x\n - y\n"
} else {
""
};
write_doc(
&wt,
&rel,
types[fi],
Some(&format!("sum {step}")),
Some(&updated),
extra,
);
write_doc(
&rb,
&rel,
types[fi],
Some(&format!("sum {step}")),
Some(&updated),
extra,
);
Index::on_write(&wt, Path::new(&rel)).unwrap();
if !live.contains(&rel) {
live.push(rel);
}
} else if op < 8 {
let idx = (next() as usize) % live.len();
let rel = live.remove(idx);
fs::remove_file(wt.root.join(&rel)).unwrap();
fs::remove_file(rb.root.join(&rel)).ok();
Index::on_remove(&wt, Path::new(&rel)).unwrap();
} else {
let idx = (next() as usize) % live.len();
let old = live[idx].clone();
let fi = (next() as usize) % folders.len();
let folder = folders[fi];
let id = 50 + (next() % 40);
let new = if folder == "sources/emails" {
format!("{folder}/2026/05/f-{id:02}.md")
} else {
format!("{folder}/f-{id:02}.md")
};
if new == old || live.contains(&new) {
continue;
}
fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
live[idx] = new;
}
}
Index::rebuild_all(&rb).unwrap();
let a = snapshot_artifacts(&wt);
let b = snapshot_artifacts(&rb);
assert_eq!(
a.keys().collect::<BTreeSet<_>>(),
b.keys().collect::<BTreeSet<_>>(),
"write-through and rebuild must produce the same set of artifacts"
);
for (k, v) in &a {
assert_eq!(
v, &b[k],
"INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
b[k]
);
}
assert!(
!a.is_empty(),
"the run must have produced at least one artifact"
);
}
}