use std::collections::BTreeMap;
use std::fs;
use std::io::Write as _;
use std::path::{Path, PathBuf};
use chrono::{DateTime, FixedOffset, SecondsFormat};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::store::{Layer, Store};
const MD_CAP: usize = 500;
const MISSING_SUMMARY: &str = "(no summary)";
const ROOT_TITLE: &str = "Knowledge base index";
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum IndexLevel {
Root,
Layer(Layer),
TypeFolder(PathBuf),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct IndexRecord {
#[serde(with = "path_serde")]
pub path: PathBuf,
#[serde(rename = "type")]
pub type_: String,
pub summary: String,
#[serde(default)]
pub tags: Vec<String>,
#[serde(default)]
pub links: Vec<String>,
pub created: Option<DateTime<FixedOffset>>,
pub updated: Option<DateTime<FixedOffset>>,
#[serde(flatten)]
pub fields: BTreeMap<String, Value>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Index {
pub level: IndexLevel,
pub records: Vec<IndexRecord>,
pub child_counts: BTreeMap<PathBuf, usize>,
}
impl Index {
pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
let rel = normalize_rel(type_folder);
let abs = store.root.join(&rel);
let mut records = Vec::new();
for file_abs in walk_type_folder_files(&abs) {
let rel_path =
rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
records.push(record_from_file(&file_abs, rel_path)?);
}
sort_records(&mut records);
Ok(Index {
level: IndexLevel::TypeFolder(rel),
records,
child_counts: BTreeMap::new(),
})
}
pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
let mut child_counts = BTreeMap::new();
for tf in type_folders_in_layer(store, layer) {
let abs = store.root.join(&tf);
let n = walk_type_folder_files(&abs).len();
if n > 0 {
child_counts.insert(tf, n);
}
}
Ok(Index {
level: IndexLevel::Layer(layer),
records: Vec::new(),
child_counts,
})
}
pub fn build_root(store: &Store) -> crate::Result<Index> {
let mut child_counts = BTreeMap::new();
for layer in Layer::all() {
for tf in type_folders_in_layer(store, layer) {
let abs = store.root.join(&tf);
let n = walk_type_folder_files(&abs).len();
if n > 0 {
child_counts.insert(tf, n);
}
}
}
Ok(Index {
level: IndexLevel::Root,
records: Vec::new(),
child_counts,
})
}
pub fn to_markdown(&self) -> String {
match &self.level {
IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
IndexLevel::Layer(layer) => self.render_layer_md(*layer),
IndexLevel::Root => self.render_root_md(),
}
}
pub fn to_jsonl(&self) -> String {
let mut out = String::new();
for rec in &self.records {
let line = serde_json::to_string(rec).expect("IndexRecord serializes");
out.push_str(&line);
out.push('\n');
}
out
}
fn render_type_folder_md(&self, folder: &Path) -> String {
let folder_disp = path_to_unix(folder);
let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: type-folder\n");
s.push_str(&format!("folder: {folder_disp}\n"));
if let Some(ts) = updated {
s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
}
s.push_str("---\n\n");
s.push_str(&format!("# {folder_disp}\n\n"));
let shown = self.records.len().min(MD_CAP);
for rec in self.records.iter().take(shown) {
s.push_str(&format_md_entry(rec));
s.push('\n');
}
if self.records.len() > MD_CAP {
let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
let layer = folder
.components()
.next()
.and_then(|c| c.as_os_str().to_str())
.unwrap_or("");
s.push('\n');
s.push_str(&more_footer(self.records.len(), type_, layer));
}
s
}
fn render_layer_md(&self, layer: Layer) -> String {
let layer_dir = layer_dir_name(layer);
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: layer\n");
s.push_str(&format!("folder: {layer_dir}\n"));
s.push_str("---\n\n");
s.push_str(&format!("# {layer_dir}\n\n"));
for (tf, n) in &self.child_counts {
let tf_unix = path_to_unix(tf);
let display = capitalize(folder_basename(tf));
s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
}
s
}
fn render_root_md(&self) -> String {
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: root\n");
s.push_str("---\n\n");
s.push_str(&format!("# {ROOT_TITLE}\n"));
for layer in Layer::all() {
let layer_dir = layer_dir_name(layer);
let prefix = format!("{layer_dir}/");
let children: Vec<(&PathBuf, &usize)> = self
.child_counts
.iter()
.filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
.collect();
if children.is_empty() {
continue;
}
let total: usize = children.iter().map(|(_, n)| **n).sum();
s.push('\n');
s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
for (tf, n) in children {
let tf_unix = path_to_unix(tf);
let display = capitalize(folder_basename(tf));
s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
}
}
s
}
}
impl Index {
pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
let file_rel = normalize_rel(file);
if is_index_artifact(&file_rel) {
return Ok(());
}
let file_abs = store.root.join(&file_rel);
let folder = type_folder_of(&file_rel)
.ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
let record = record_from_file(&file_abs, file_rel.clone())?;
let _lock = FolderLock::acquire(&store.root.join(&folder));
let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
records.retain(|r| r.path != record.path);
records.push(record);
sort_records(&mut records);
write_type_folder_artifacts(store, &folder, &records)?;
update_parents(store, &folder)?;
Ok(())
}
pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
let old_rel = normalize_rel(old);
let new_rel = normalize_rel(new);
if is_index_artifact(&old_rel) || is_index_artifact(&new_rel) {
return Ok(());
}
let old_folder = type_folder_of(&old_rel)
.ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
let new_folder = type_folder_of(&new_rel)
.ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
let _locks = lock_folders(store, &old_folder, &new_folder);
let mut old_records =
read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
old_records.retain(|r| r.path != old_rel);
if old_folder == new_folder {
let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
old_records.retain(|r| r.path != record.path);
old_records.push(record);
sort_records(&mut old_records);
write_type_folder_artifacts(store, &old_folder, &old_records)?;
update_parents(store, &old_folder)?;
return Ok(());
}
sort_records(&mut old_records);
write_type_folder_artifacts(store, &old_folder, &old_records)?;
let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
let mut new_records =
read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
new_records.retain(|r| r.path != record.path);
new_records.push(record);
sort_records(&mut new_records);
write_type_folder_artifacts(store, &new_folder, &new_records)?;
update_parents(store, &old_folder)?;
update_parents(store, &new_folder)?;
Ok(())
}
pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
let file_rel = normalize_rel(file);
if is_index_artifact(&file_rel) {
return Ok(());
}
let folder = type_folder_of(&file_rel)
.ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
let _lock = FolderLock::acquire(&store.root.join(&folder));
let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
let before = records.len();
records.retain(|r| r.path != file_rel);
if records.len() == before {
}
sort_records(&mut records);
write_type_folder_artifacts(store, &folder, &records)?;
update_parents(store, &folder)?;
Ok(())
}
pub fn rebuild_all(store: &Store) -> crate::Result<()> {
Index::cleanup(store)?;
for layer in Layer::all() {
for tf in type_folders_in_layer(store, layer) {
let idx = Index::build_type_folder(store, &tf)?;
if idx.records.is_empty() {
continue;
}
write_type_folder_artifacts(store, &tf, &idx.records)?;
}
let layer_idx = Index::build_layer(store, layer)?;
let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
if layer_idx.child_counts.is_empty() {
remove_if_exists(&layer_index_md)?;
} else {
write_atomic(
&layer_index_md,
render_layer_md_with_store(store, &layer_idx),
)?;
}
}
let root_idx = Index::build_root(store)?;
let root_index_md = store.root.join("index.md");
if root_idx.child_counts.is_empty() {
remove_if_exists(&root_index_md)?;
} else {
write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
}
Ok(())
}
pub fn rebuild_folder(store: &Store, folder: &Path) -> crate::Result<()> {
Self::write_level(store, &IndexLevel::TypeFolder(folder.to_path_buf()))?;
update_parents(store, folder)
}
pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
match level {
IndexLevel::TypeFolder(folder) => {
let idx = Index::build_type_folder(store, folder)?;
if idx.records.is_empty() {
remove_if_exists(&store.root.join(folder).join("index.md"))?;
remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
} else {
write_type_folder_artifacts(store, folder, &idx.records)?;
}
}
IndexLevel::Layer(layer) => {
let idx = Index::build_layer(store, *layer)?;
let p = store.root.join(layer_dir_name(*layer)).join("index.md");
if idx.child_counts.is_empty() {
remove_if_exists(&p)?;
} else {
write_atomic(&p, render_layer_md_with_store(store, &idx))?;
}
}
IndexLevel::Root => {
let idx = Index::build_root(store)?;
let p = store.root.join("index.md");
if idx.child_counts.is_empty() {
remove_if_exists(&p)?;
} else {
write_atomic(&p, render_root_md_with_store(store, &idx))?;
}
}
}
Ok(())
}
pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
let mut out = String::new();
match level {
IndexLevel::TypeFolder(folder) => {
let idx = Index::build_type_folder(store, folder)?;
let md_path = path_to_unix(&folder.join("index.md"));
let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
out.push_str(&format!("--- {md_path} ---\n"));
out.push_str(&idx.to_markdown());
out.push_str(&format!("--- {jsonl_path} ---\n"));
out.push_str(&idx.to_jsonl());
}
IndexLevel::Layer(layer) => {
let idx = Index::build_layer(store, *layer)?;
let md_path = format!("{}/index.md", layer_dir_name(*layer));
out.push_str(&format!("--- {md_path} ---\n"));
out.push_str(&render_layer_md_with_store(store, &idx));
}
IndexLevel::Root => {
let idx = Index::build_root(store)?;
out.push_str("--- index.md ---\n");
out.push_str(&render_root_md_with_store(store, &idx));
}
}
Ok(out)
}
pub fn cleanup(store: &Store) -> crate::Result<()> {
for layer in Layer::all() {
let layer_dir = store.root.join(layer_dir_name(layer));
if !layer_dir.is_dir() {
continue;
}
for tf in type_folders_in_layer(store, layer) {
let tf_abs = store.root.join(&tf);
for entry in walkdir::WalkDir::new(&tf_abs)
.min_depth(2)
.into_iter()
.filter_map(|e| e.ok())
{
let p = entry.path();
if is_index_artifact(p) && is_deletable_catalog_artifact(p) {
remove_if_exists(p)?;
}
}
if walk_type_folder_files(&tf_abs).is_empty() {
let md = tf_abs.join("index.md");
if is_deletable_catalog_artifact(&md) {
remove_if_exists(&md)?;
}
remove_if_exists(&tf_abs.join("index.jsonl"))?;
}
}
}
Ok(())
}
}
fn write_type_folder_artifacts(
store: &Store,
folder: &Path,
records: &[IndexRecord],
) -> crate::Result<()> {
let folder_abs = store.root.join(folder);
let md_path = folder_abs.join("index.md");
let jsonl_path = folder_abs.join("index.jsonl");
if records.is_empty() {
remove_if_exists(&md_path)?;
remove_if_exists(&jsonl_path)?;
return Ok(());
}
let idx = Index {
level: IndexLevel::TypeFolder(folder.to_path_buf()),
records: records.to_vec(),
child_counts: BTreeMap::new(),
};
write_atomic(&md_path, idx.to_markdown())?;
write_atomic(&jsonl_path, idx.to_jsonl())?;
Ok(())
}
fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
let stats = collect_child_stats(store, &Layer::all())?;
let layer = folder
.components()
.next()
.and_then(|c| c.as_os_str().to_str())
.and_then(layer_from_dir_name);
if let Some(layer) = layer {
let p = store.root.join(layer_dir_name(layer)).join("index.md");
if layer_has_children(&stats, layer) {
write_atomic(&p, render_layer_md_from_stats(layer, &stats))?;
} else {
remove_if_exists(&p)?;
}
}
let rp = store.root.join("index.md");
if stats.values().any(|s| s.count > 0) {
write_atomic(&rp, render_root_md_from_stats(&stats))?;
} else {
remove_if_exists(&rp)?;
}
Ok(())
}
fn layer_has_children(stats: &BTreeMap<PathBuf, FolderStat>, layer: Layer) -> bool {
let prefix = format!("{}/", layer_dir_name(layer));
stats
.iter()
.any(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
}
fn render_layer_md_from_stats(layer: Layer, stats: &BTreeMap<PathBuf, FolderStat>) -> String {
let layer_dir = layer_dir_name(layer);
let prefix = format!("{layer_dir}/");
let mut max_upd: Option<DateTime<FixedOffset>> = None;
let mut entries = String::new();
for (tf, stat) in stats {
if stat.count == 0 || !path_to_unix(tf).starts_with(&prefix) {
continue;
}
let newest = stat.newest.as_ref();
if let Some(u) = newest.and_then(|r| r.updated) {
max_upd = Some(match max_upd {
Some(cur) if cur >= u => cur,
_ => u,
});
}
let tf_unix = path_to_unix(tf);
let display = capitalize(folder_basename(tf));
let preview = newest
.map(|r| truncate(&r.summary, 80))
.filter(|p| !p.is_empty() && p != MISSING_SUMMARY);
match preview {
Some(p) => entries.push_str(&format!(
"- [[{tf_unix}/index|{display}]] ({}) — {p}\n",
stat.count
)),
None => entries.push_str(&format!(
"- [[{tf_unix}/index|{display}]] ({})\n",
stat.count
)),
}
}
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: layer\n");
s.push_str(&format!("folder: {layer_dir}\n"));
if let Some(ts) = max_upd {
s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
}
s.push_str("---\n\n");
s.push_str(&format!("# {layer_dir}\n\n"));
s.push_str(&entries);
s
}
fn render_root_md_from_stats(stats: &BTreeMap<PathBuf, FolderStat>) -> String {
let mut max_upd: Option<DateTime<FixedOffset>> = None;
for stat in stats.values() {
if stat.count == 0 {
continue;
}
if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
max_upd = Some(match max_upd {
Some(cur) if cur >= u => cur,
_ => u,
});
}
}
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: root\n");
if let Some(ts) = max_upd {
s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
}
s.push_str("---\n\n");
s.push_str(&format!("# {ROOT_TITLE}\n"));
for layer in Layer::all() {
let layer_dir = layer_dir_name(layer);
let prefix = format!("{layer_dir}/");
let children: Vec<(&PathBuf, usize)> = stats
.iter()
.filter(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
.map(|(tf, s)| (tf, s.count))
.collect();
if children.is_empty() {
continue;
}
let total: usize = children.iter().map(|(_, n)| *n).sum();
s.push('\n');
s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
for (tf, n) in children {
let tf_unix = path_to_unix(tf);
let display = capitalize(folder_basename(tf));
s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
}
}
s
}
fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
let layer = match idx.level {
IndexLevel::Layer(l) => l,
_ => unreachable!("render_layer_md_with_store called on non-layer"),
};
let layer_dir = layer_dir_name(layer);
let mut max_upd: Option<DateTime<FixedOffset>> = None;
let mut entries = String::new();
for (tf, n) in &idx.child_counts {
let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
let newest = recs.first();
if let Some(u) = newest.and_then(|r| r.updated) {
max_upd = Some(match max_upd {
Some(cur) if cur >= u => cur,
_ => u,
});
}
let tf_unix = path_to_unix(tf);
let display = capitalize(folder_basename(tf));
let preview = newest
.map(|r| truncate(&r.summary, 80))
.filter(|p| !p.is_empty() && p != MISSING_SUMMARY);
match preview {
Some(p) => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n}) — {p}\n")),
None => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n")),
}
}
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: layer\n");
s.push_str(&format!("folder: {layer_dir}\n"));
if let Some(ts) = max_upd {
s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
}
s.push_str("---\n\n");
s.push_str(&format!("# {layer_dir}\n\n"));
s.push_str(&entries);
s
}
fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
let mut max_upd: Option<DateTime<FixedOffset>> = None;
for tf in idx.child_counts.keys() {
let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
if let Some(u) = recs.first().and_then(|r| r.updated) {
max_upd = Some(match max_upd {
Some(cur) if cur >= u => cur,
_ => u,
});
}
}
let mut s = String::new();
s.push_str("---\n");
s.push_str("type: index\n");
s.push_str("scope: root\n");
if let Some(ts) = max_upd {
s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
}
s.push_str("---\n\n");
s.push_str(&format!("# {ROOT_TITLE}\n"));
for layer in Layer::all() {
let layer_dir = layer_dir_name(layer);
let prefix = format!("{layer_dir}/");
let children: Vec<(&PathBuf, &usize)> = idx
.child_counts
.iter()
.filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
.collect();
if children.is_empty() {
continue;
}
let total: usize = children.iter().map(|(_, n)| **n).sum();
s.push('\n');
s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
for (tf, n) in children {
let tf_unix = path_to_unix(tf);
let display = capitalize(folder_basename(tf));
s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
}
}
s
}
fn format_md_entry(rec: &IndexRecord) -> String {
let path = wiki_target(&rec.path);
let summary = collapse_whitespace(&rec.summary);
let mut line = format!("- [[{path}]] — {summary}");
if !rec.tags.is_empty() {
let tags = rec
.tags
.iter()
.map(|t| format!("#{t}"))
.collect::<Vec<_>>()
.join(" ");
line.push_str(&format!(" · {tags}"));
}
line
}
fn more_footer(total: usize, type_: &str, layer: &str) -> String {
format!(
"## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd index query --type {type_} --in {layer}` for the complete catalog.\n"
)
}
fn sort_records(records: &mut [IndexRecord]) {
records.sort_by(record_recency_cmp);
}
impl IndexRecord {
pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
record_from_file(abs, rel)
}
}
fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
let mut meta = read_frontmatter(abs)?;
if rel.starts_with("records") {
meta.fields
.entry("meta-type".to_string())
.or_insert_with(|| Value::String("fact".to_string()));
}
Ok(IndexRecord {
path: rel,
type_: meta.type_.unwrap_or_default(),
summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
tags: meta.tags,
links: meta.links,
created: meta.created,
updated: meta.updated,
fields: meta.fields,
})
}
struct FileMeta {
type_: Option<String>,
summary: Option<String>,
tags: Vec<String>,
links: Vec<String>,
created: Option<DateTime<FixedOffset>>,
updated: Option<DateTime<FixedOffset>>,
fields: BTreeMap<String, Value>,
}
fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
let bytes = fs::read(abs)?;
let yaml = extract_frontmatter_block_lossy(&bytes).unwrap_or_default();
let map: serde_norway::Mapping = if yaml.trim().is_empty() {
serde_norway::Mapping::new()
} else {
serde_norway::from_str(&yaml).map_err(|e| {
crate::Error::Store(crate::store::StoreError::BadTypeIndex {
path: abs.to_path_buf(),
message: format!("frontmatter YAML: {e}"),
})
})?
};
let mut type_ = None;
let mut summary = None;
let mut tags = Vec::new();
let mut links = Vec::new();
let mut created = None;
let mut updated = None;
let mut fields = BTreeMap::new();
for (k, v) in map {
let key = match k.as_str() {
Some(s) => s.to_string(),
None => continue,
};
match key.as_str() {
"type" => type_ = scalar_string(&v),
"summary" => summary = scalar_string(&v),
"tags" => tags = yaml_string_list(&v),
"links" => links = yaml_string_list(&v),
"created" => created = v.as_str().and_then(parse_ts),
"updated" => updated = v.as_str().and_then(parse_ts),
"path" => {}
_ => {
fields.insert(key, yaml_to_json_value(&v));
}
}
}
Ok(FileMeta {
type_,
summary,
tags,
links,
created,
updated,
fields,
})
}
fn scalar_string(v: &serde_norway::Value) -> Option<String> {
match v {
serde_norway::Value::String(s) => Some(s.clone()),
serde_norway::Value::Number(n) => Some(n.to_string()),
serde_norway::Value::Bool(b) => Some(b.to_string()),
_ => None,
}
}
fn extract_frontmatter_block_lossy(bytes: &[u8]) -> Option<String> {
let text = String::from_utf8_lossy(bytes);
extract_frontmatter_block(&text)
}
fn extract_frontmatter_block(text: &str) -> Option<String> {
let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
let mut lines = trimmed.lines();
let first = lines.next()?;
if first.trim_end() != "---" {
return None;
}
let mut block = String::new();
for line in lines {
if line.trim_end() == "---" {
return Some(block);
}
block.push_str(line);
block.push('\n');
}
None }
fn yaml_string_list(v: &serde_norway::Value) -> Vec<String> {
match v {
serde_norway::Value::String(s) => vec![s.clone()],
serde_norway::Value::Sequence(seq) => seq
.iter()
.filter_map(yaml_string_or_wiki_link_literal)
.collect(),
_ => Vec::new(),
}
}
fn yaml_string_or_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
v.as_str()
.map(str::to_string)
.or_else(|| unquoted_wiki_link_literal(v))
}
fn yaml_to_json_value(v: &serde_norway::Value) -> Value {
if let Some(link) = unquoted_wiki_link_literal(v) {
return Value::String(link);
}
match v {
serde_norway::Value::String(s) => Value::String(s.clone()),
serde_norway::Value::Bool(b) => Value::Bool(*b),
serde_norway::Value::Number(n) => {
serde_json::to_value(n).unwrap_or_else(|_| Value::String(n.to_string()))
}
serde_norway::Value::Sequence(seq) => {
Value::Array(seq.iter().map(yaml_to_json_value).collect())
}
serde_norway::Value::Mapping(_) | serde_norway::Value::Tagged(_) => {
serde_json::to_value(v).unwrap_or(Value::Null)
}
serde_norway::Value::Null => Value::Null,
}
}
fn unquoted_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
let serde_norway::Value::Sequence(outer) = v else {
return None;
};
if outer.len() != 1 {
return None;
}
let serde_norway::Value::Sequence(inner) = &outer[0] else {
return None;
};
let [serde_norway::Value::String(target)] = inner.as_slice() else {
return None;
};
Some(format!("[[{target}]]"))
}
fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
DateTime::parse_from_rfc3339(s.trim()).ok()
}
fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
}
fn max_updated<'a>(
it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
) -> Option<DateTime<FixedOffset>> {
let mut best: Option<DateTime<FixedOffset>> = None;
for ts in it.flatten() {
best = Some(match best {
Some(cur) if cur >= *ts => cur,
_ => *ts,
});
}
best
}
fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
let text = match fs::read_to_string(jsonl) {
Ok(t) => t,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
Err(e) => return Err(e.into()),
};
let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
for (i, line) in text.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
crate::Error::Store(crate::store::StoreError::BadTypeIndex {
path: jsonl.to_path_buf(),
message: format!("line {}: {e}", i + 1),
})
})?;
by_path.insert(rec.path.clone(), rec);
}
let mut records: Vec<IndexRecord> = by_path.into_values().collect();
sort_records(&mut records);
Ok(records)
}
#[derive(Debug, Clone, Default, PartialEq)]
struct FolderStat {
count: usize,
newest: Option<IndexRecord>,
}
fn read_folder_stat(jsonl: &Path) -> crate::Result<FolderStat> {
let text = match fs::read_to_string(jsonl) {
Ok(t) => t,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(FolderStat::default()),
Err(e) => return Err(e.into()),
};
let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
for (i, line) in text.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
crate::Error::Store(crate::store::StoreError::BadTypeIndex {
path: jsonl.to_path_buf(),
message: format!("line {}: {e}", i + 1),
})
})?;
by_path.insert(rec.path.clone(), rec);
}
let count = by_path.len();
let newest = by_path.into_values().min_by(record_recency_cmp);
Ok(FolderStat { count, newest })
}
fn record_recency_cmp(a: &IndexRecord, b: &IndexRecord) -> std::cmp::Ordering {
match (b.updated, a.updated) {
(Some(bu), Some(au)) => bu.cmp(&au),
(Some(_), None) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Less, (None, None) => std::cmp::Ordering::Equal,
}
.then_with(|| a.path.cmp(&b.path))
}
fn collect_child_stats(
store: &Store,
layers: &[Layer],
) -> crate::Result<BTreeMap<PathBuf, FolderStat>> {
let mut stats = BTreeMap::new();
for &layer in layers {
for tf in type_folders_in_layer(store, layer) {
let stat = read_folder_stat(&store.root.join(&tf).join("index.jsonl"))?;
if stat.count > 0 {
stats.insert(tf, stat);
}
}
}
Ok(stats)
}
fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
let mut out = Vec::new();
if !folder_abs.is_dir() {
return out;
}
for entry in walkdir::WalkDir::new(folder_abs)
.into_iter()
.filter_entry(|e| !is_hidden(e.file_name()))
.filter_map(|e| e.ok())
{
if !entry.file_type().is_file() {
continue;
}
let p = entry.path();
if p.extension().and_then(|e| e.to_str()) != Some("md") {
continue;
}
if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
continue;
}
out.push(p.to_path_buf());
}
out
}
fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
let layer_dir = store.root.join(layer_dir_name(layer));
let mut out = Vec::new();
let rd = match fs::read_dir(&layer_dir) {
Ok(rd) => rd,
Err(_) => return out,
};
for entry in rd.flatten() {
if !entry.path().is_dir() {
continue;
}
let name = entry.file_name();
let name = match name.to_str() {
Some(n) => n,
None => continue,
};
if is_hidden(entry.file_name().as_os_str()) || name == "log" {
continue;
}
out.push(PathBuf::from(layer_dir_name(layer)).join(name));
}
out.sort();
out
}
fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
let mut comps = file_rel.components();
let layer = comps.next()?.as_os_str().to_str()?;
layer_from_dir_name(layer)?;
let type_seg = comps.next()?.as_os_str().to_str()?;
Some(PathBuf::from(layer).join(type_seg))
}
fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
}
fn normalize_rel(p: &Path) -> PathBuf {
let s = path_to_unix(p);
let s = s.strip_prefix("./").unwrap_or(&s);
PathBuf::from(s)
}
fn is_index_artifact(p: &Path) -> bool {
matches!(
p.file_name().and_then(|n| n.to_str()),
Some("index.md") | Some("index.jsonl")
)
}
fn is_deletable_catalog_artifact(p: &Path) -> bool {
match p.file_name().and_then(|n| n.to_str()) {
Some("index.jsonl") => true,
Some("index.md") => match read_frontmatter(p) {
Ok(meta) => meta.type_.as_deref().is_none_or(|t| t == "index"),
Err(_) => true,
},
_ => false,
}
}
fn is_hidden(name: &std::ffi::OsStr) -> bool {
name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
}
fn layer_dir_name(layer: Layer) -> &'static str {
match layer {
Layer::Sources => "sources",
Layer::Records => "records",
}
}
fn layer_from_dir_name(name: &str) -> Option<Layer> {
match name {
"sources" => Some(Layer::Sources),
"records" => Some(Layer::Records),
_ => None,
}
}
fn folder_basename(p: &Path) -> &str {
p.file_name().and_then(|n| n.to_str()).unwrap_or("")
}
fn wiki_target(p: &Path) -> String {
let unix = path_to_unix(p);
unix.strip_suffix(".md").unwrap_or(&unix).to_string()
}
fn path_to_unix(p: &Path) -> String {
p.components()
.map(|c| c.as_os_str().to_string_lossy().into_owned())
.collect::<Vec<_>>()
.join("/")
}
mod path_serde {
use super::path_to_unix;
use serde::{Deserialize, Deserializer, Serializer};
use std::path::{Path, PathBuf};
pub fn serialize<S: Serializer>(p: &Path, s: S) -> Result<S::Ok, S::Error> {
s.serialize_str(&path_to_unix(p))
}
pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<PathBuf, D::Error> {
Ok(PathBuf::from(String::deserialize(d)?))
}
}
fn capitalize(s: &str) -> String {
let mut chars = s.chars();
match chars.next() {
Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
None => String::new(),
}
}
fn collapse_whitespace(s: &str) -> String {
s.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn truncate(s: &str, max: usize) -> String {
let one_line = collapse_whitespace(s);
if one_line.chars().count() <= max {
one_line
} else {
one_line.chars().take(max).collect()
}
}
fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let dir = path.parent().unwrap_or_else(|| Path::new("."));
let mut tmp = tempfile_in(dir)?;
tmp.write_all(contents.as_bytes())?;
tmp.flush()?;
tmp.persist(path)?;
Ok(())
}
fn remove_if_exists(path: &Path) -> crate::Result<()> {
match fs::remove_file(path) {
Ok(()) => Ok(()),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
Err(e) => Err(e.into()),
}
}
fn bad_index(path: &Path, msg: &str) -> crate::Error {
crate::Error::Store(crate::store::StoreError::BadTypeIndex {
path: path.to_path_buf(),
message: msg.to_string(),
})
}
struct FolderLock {
path: PathBuf,
held: bool,
}
impl FolderLock {
fn acquire(folder_abs: &Path) -> Self {
use std::time::{Duration, SystemTime};
const MAX_ATTEMPTS: u32 = 600; const SPIN: Duration = Duration::from_millis(10);
const STALE_AFTER: Duration = Duration::from_secs(30);
let path = folder_abs.join(".index.lock");
let _ = fs::create_dir_all(folder_abs);
for _ in 0..MAX_ATTEMPTS {
match fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&path)
{
Ok(_) => {
return FolderLock { path, held: true };
}
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
if let Ok(meta) = fs::metadata(&path) {
if let Ok(modified) = meta.modified() {
if SystemTime::now()
.duration_since(modified)
.map(|age| age > STALE_AFTER)
.unwrap_or(false)
{
let _ = fs::remove_file(&path);
continue;
}
}
}
std::thread::sleep(SPIN);
}
Err(_) => return FolderLock { path, held: false },
}
}
FolderLock { path, held: false }
}
}
impl Drop for FolderLock {
fn drop(&mut self) {
if self.held {
let _ = fs::remove_file(&self.path);
}
}
}
fn lock_folders(store: &Store, a: &Path, b: &Path) -> Vec<FolderLock> {
if a == b {
return vec![FolderLock::acquire(&store.root.join(a))];
}
let (first, second) = if a < b { (a, b) } else { (b, a) };
vec![
FolderLock::acquire(&store.root.join(first)),
FolderLock::acquire(&store.root.join(second)),
]
}
struct AtomicTemp {
file: Option<fs::File>,
path: PathBuf,
persisted: bool,
}
impl AtomicTemp {
fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
self.file.as_mut().expect("temp file open").write_all(bytes)
}
fn flush(&mut self) -> std::io::Result<()> {
self.file.as_mut().expect("temp file open").flush()
}
fn persist(mut self, dest: &Path) -> std::io::Result<()> {
if let Some(f) = self.file.take() {
f.sync_all().ok();
}
fs::rename(&self.path, dest)?;
self.persisted = true;
Ok(())
}
}
impl Drop for AtomicTemp {
fn drop(&mut self) {
if !self.persisted {
let _ = fs::remove_file(&self.path);
}
}
}
fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
use std::time::{SystemTime, UNIX_EPOCH};
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
let pid = std::process::id();
let counter = next_temp_counter();
let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
let path = dir.join(name);
let file = fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&path)?;
Ok(AtomicTemp {
file: Some(file),
path,
persisted: false,
})
}
fn next_temp_counter() -> u64 {
use std::sync::atomic::{AtomicU64, Ordering};
static C: AtomicU64 = AtomicU64::new(0);
C.fetch_add(1, Ordering::Relaxed)
}
#[cfg(test)]
mod tests {
use super::*;
use std::collections::BTreeSet;
use std::fs;
use tempfile::TempDir;
fn mk_store() -> (TempDir, Store) {
let dir = TempDir::new().unwrap();
fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
let store = Store {
root: dir.path().to_path_buf(),
config: crate::parser::Config::default(),
};
(dir, store)
}
fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
let abs = store.root.join(rel);
fs::create_dir_all(abs.parent().unwrap()).unwrap();
fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
}
fn write_doc(
store: &Store,
rel: &str,
type_: &str,
summary: Option<&str>,
updated: Option<&str>,
extra_yaml: &str,
) {
let mut fm = format!("type: {type_}\n");
if let Some(s) = summary {
fm.push_str(&format!("summary: {s}\n"));
}
if let Some(u) = updated {
fm.push_str(&format!("updated: {u}\n"));
}
fm.push_str(extra_yaml);
write_raw(store, rel, fm.trim_end(), "\nbody text\n");
}
fn read(store: &Store, rel: &str) -> String {
fs::read_to_string(store.root.join(rel)).unwrap()
}
fn exists(store: &Store, rel: &str) -> bool {
store.root.join(rel).exists()
}
fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
let mut out = BTreeMap::new();
for entry in walkdir::WalkDir::new(&store.root)
.into_iter()
.filter_map(|e| e.ok())
{
let p = entry.path();
if is_index_artifact(p) {
let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
out.insert(rel, fs::read_to_string(p).unwrap());
}
}
out
}
#[test]
fn type_folder_aggregates_across_shards_in_recency_order() {
let (_d, store) = mk_store();
write_doc(
&store,
"sources/emails/2026/05/b-old.md",
"email",
Some("Older mail"),
Some("2026-05-01T09:00:00Z"),
"",
);
write_doc(
&store,
"sources/emails/2026/06/c-new.md",
"email",
Some("Newest mail"),
Some("2026-06-15T12:00:00Z"),
"",
);
write_doc(
&store,
"sources/emails/2026/05/a-mid.md",
"email",
Some("Middle mail"),
Some("2026-05-20T08:00:00Z"),
"",
);
let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
assert_eq!(
paths,
vec![
"sources/emails/2026/06/c-new.md",
"sources/emails/2026/05/a-mid.md",
"sources/emails/2026/05/b-old.md",
],
"records must aggregate across shards, newest `updated` first"
);
}
#[test]
fn type_folder_md_format_entries_tags_and_derived_updated() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/sarah-chen.md",
"contact",
Some("Renewal champion at Acme"),
Some("2026-05-27T10:00:00Z"),
"tags:\n - renewal\n - acme\n",
);
write_doc(
&store,
"records/contacts/no-tags.md",
"contact",
Some("Plain contact"),
Some("2026-05-26T10:00:00Z"),
"",
);
let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
let md = idx.to_markdown();
assert!(md.starts_with(
"---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
), "frontmatter/heading wrong:\n{md}");
assert!(
md.contains(
"- [[records/contacts/sarah-chen]] — Renewal champion at Acme · #renewal #acme\n"
),
"tagged entry wrong:\n{md}"
);
assert!(
md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
"untagged entry wrong:\n{md}"
);
assert!(
!md.contains("Plain contact ·"),
"untagged entry must not emit a tag separator"
);
assert!(!md.contains("## More"), "no footer expected under the cap");
}
#[test]
fn missing_summary_becomes_placeholder_not_invented() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/notes/x.md",
"note",
None,
Some("2026-05-27T10:00:00Z"),
"",
);
let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
let md = idx.to_markdown();
assert!(
md.contains("- [[records/notes/x]] — (no summary)\n"),
"missing summary must render the placeholder, not invent text:\n{md}"
);
}
#[test]
fn jsonl_is_complete_structured_and_round_trips() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/expenses/2026/05/e1.md",
"expense",
Some("Lunch with vendor"),
Some("2026-05-10T10:00:00Z"),
"created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ncompany: [[records/companies/acme]]\nrelated:\n - [[wiki/themes/spend]]\ntags:\n - food\nlinks:\n - wiki/themes/spend\n - [[wiki/themes/renewal]]\n",
);
write_doc(
&store,
"records/expenses/2026/06/e2.md",
"expense",
Some("Cloud bill"),
Some("2026-06-01T10:00:00Z"),
"amount: 100\n",
);
let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
let jsonl = idx.to_jsonl();
let lines: Vec<&str> = jsonl.lines().collect();
assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
assert_eq!(
r0, idx.records[0],
"jsonl line must round-trip to the record"
);
let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
assert_eq!(r1.type_, "expense");
assert_eq!(r1.summary, "Lunch with vendor");
assert_eq!(r1.tags, vec!["food".to_string()]);
assert_eq!(
r1.links,
vec![
"wiki/themes/spend".to_string(),
"[[wiki/themes/renewal]]".to_string()
]
);
assert_eq!(
r1.created,
Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
);
assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
assert_eq!(
r1.fields.get("company"),
Some(&Value::from("[[records/companies/acme]]"))
);
assert_eq!(
r1.fields.get("related"),
Some(&serde_json::json!(["[[wiki/themes/spend]]"]))
);
for reserved in [
"path", "type", "summary", "tags", "links", "created", "updated",
] {
assert!(
!r1.fields.contains_key(reserved),
"reserved key {reserved} must not appear in fields"
);
}
assert!(
lines[1].starts_with(
r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["wiki/themes/spend","[[wiki/themes/renewal]]"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
),
"jsonl key order not stable:\n{}",
lines[1]
);
assert!(
lines[1].ends_with(r#""amount":42,"company":"[[records/companies/acme]]","meta-type":"fact","related":["[[wiki/themes/spend]]"],"status":"paid"}"#),
"extras must be sorted:\n{}",
lines[1]
);
}
#[test]
fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
let (_d, store) = mk_store();
let total = MD_CAP + 7;
for i in 0..total {
let day = 1 + (i % 27);
let rel = format!("sources/emails/2026/05/m-{i:04}.md");
let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
write_doc(
&store,
&rel,
"email",
Some(&format!("mail {i}")),
Some(&updated),
"",
);
}
let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
let md = idx.to_markdown();
let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
assert!(
md.contains("## More\n\n"),
"over-cap md needs a More footer"
);
assert!(
md.contains(&format!(
"This folder has {total} files. The 500 most recent are listed above.\n"
)),
"footer count wrong:\n{md}"
);
assert!(
md.contains(
"Use `dbmd index query --type email --in sources` for the complete catalog.\n"
),
"footer must infer type=email layer=sources:\n{md}"
);
let jsonl = idx.to_jsonl();
assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
}
#[test]
fn sort_breaks_ties_by_path_and_puts_undated_last() {
let mut recs = vec![
rec("z/a.md", Some("2026-05-01T00:00:00Z")),
rec("a/b.md", Some("2026-05-01T00:00:00Z")), rec("m/c.md", None), rec("b/d.md", Some("2026-06-01T00:00:00Z")), ];
sort_records(&mut recs);
let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
}
fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
IndexRecord {
path: PathBuf::from(path),
type_: "t".into(),
summary: "s".into(),
tags: vec![],
links: vec![],
created: None,
updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
fields: BTreeMap::new(),
}
}
#[test]
fn layer_index_lists_type_folders_with_counts_and_preview() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/a.md",
"contact",
Some("Contact A older"),
Some("2026-05-01T00:00:00Z"),
"",
);
write_doc(
&store,
"records/contacts/b.md",
"contact",
Some("Contact B newest"),
Some("2026-05-09T00:00:00Z"),
"",
);
write_doc(
&store,
"records/companies/x.md",
"company",
Some("Acme Inc"),
Some("2026-05-05T00:00:00Z"),
"",
);
Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
let md = read(&store, "records/index.md");
assert!(
md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
"layer fm:\n{md}"
);
let companies_at = md.find("companies/index").unwrap();
let contacts_at = md.find("contacts/index").unwrap();
assert!(
companies_at < contacts_at,
"type folders must be alphabetical"
);
assert!(
md.contains("- [[records/contacts/index|Contacts]] (2) — Contact B newest\n"),
"contacts entry:\n{md}"
);
assert!(
md.contains("- [[records/companies/index|Companies]] (1) — Acme Inc\n"),
"companies entry:\n{md}"
);
assert!(
md.contains("updated: 2026-05-09T00:00:00Z\n"),
"layer updated must be max child:\n{md}"
);
}
#[test]
fn root_index_groups_layers_with_totals_and_per_type_counts() {
let (_d, store) = mk_store();
write_doc(
&store,
"sources/emails/2026/05/a.md",
"email",
Some("Mail"),
Some("2026-05-01T00:00:00Z"),
"",
);
write_doc(
&store,
"sources/docs/d.md",
"doc",
Some("Doc"),
Some("2026-05-02T00:00:00Z"),
"",
);
write_doc(
&store,
"records/contacts/c.md",
"contact",
Some("C"),
Some("2026-05-03T00:00:00Z"),
"",
);
Index::rebuild_all(&store).unwrap();
let md = read(&store, "index.md");
assert!(
md.starts_with("---\ntype: index\nscope: root\n"),
"root fm:\n{md}"
);
assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
let sources_h = md
.find("## Sources (2)")
.expect("sources heading w/ total 2");
let records_h = md
.find("## Records (1)")
.expect("records heading w/ total 1");
assert!(sources_h < records_h, "Sources must precede Records");
assert!(!md.contains("## Wiki"), "empty layer gets no section");
assert!(
md.contains("- [[sources/docs/index|Docs]] (1)\n"),
"root docs entry:\n{md}"
);
assert!(
md.contains("- [[sources/emails/index|Emails]] (1)\n"),
"root emails entry:\n{md}"
);
assert!(
md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
"root contacts entry:\n{md}"
);
assert!(!md.contains("— "), "root entries carry no preview text");
}
#[test]
fn on_write_matches_rebuild_byte_for_byte() {
let (_d1, wt) = mk_store();
let (_d2, rb) = mk_store();
let docs: &[(&str, &str, &str, &str, &str)] = &[
(
"sources/emails/2026/05/e1.md",
"email",
"First mail",
"2026-05-01T10:00:00Z",
"tags:\n - inbox\n",
),
(
"sources/emails/2026/06/e2.md",
"email",
"Second mail",
"2026-06-01T10:00:00Z",
"",
),
(
"records/contacts/sarah.md",
"contact",
"Sarah",
"2026-05-15T10:00:00Z",
"links:\n - wiki/people/sarah\n",
),
(
"records/contacts/elena.md",
"contact",
"Elena",
"2026-05-20T10:00:00Z",
"status: active\n",
),
(
"records/profiles/sarah.md",
"profile",
"Sarah bio",
"2026-05-21T10:00:00Z",
"",
),
];
for (rel, t, sum, upd, extra) in docs {
write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
Index::on_write(&wt, Path::new(rel)).unwrap();
}
Index::rebuild_all(&rb).unwrap();
let a = snapshot_artifacts(&wt);
let b = snapshot_artifacts(&rb);
assert_eq!(
a.keys().collect::<Vec<_>>(),
b.keys().collect::<Vec<_>>(),
"same set of index artifacts must exist"
);
for (k, v) in &a {
assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
}
assert!(a.contains_key("index.md"));
assert!(a.contains_key("sources/emails/index.jsonl"));
assert!(a.contains_key("records/contacts/index.md"));
}
#[test]
fn loop_op_does_not_walk_sibling_content_tree() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/companies/acme.md",
"company",
Some("Acme Inc"),
Some("2026-05-05T00:00:00Z"),
"",
);
write_doc(
&store,
"records/companies/globex.md",
"company",
Some("Globex"),
Some("2026-05-06T00:00:00Z"),
"",
);
assert!(
!exists(&store, "records/companies/index.jsonl"),
"precondition: companies must be un-indexed"
);
write_doc(
&store,
"records/contacts/sarah.md",
"contact",
Some("Sarah"),
Some("2026-05-15T00:00:00Z"),
"",
);
Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
let layer_md = read(&store, "records/index.md");
let root_md = read(&store, "index.md");
assert!(
layer_md.contains("- [[records/contacts/index|Contacts]] (1) — Sarah\n"),
"layer must reflect the written folder:\n{layer_md}"
);
assert!(
root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
"root must reflect the written folder:\n{root_md}"
);
assert!(
!layer_md.contains("companies"),
"loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
);
assert!(
!root_md.contains("companies"),
"loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
);
assert!(
root_md.contains("## Records (1)"),
"root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
);
let (_d2, rb) = mk_store();
for (rel, t, s, u) in [
(
"records/companies/acme.md",
"company",
"Acme Inc",
"2026-05-05T00:00:00Z",
),
(
"records/companies/globex.md",
"company",
"Globex",
"2026-05-06T00:00:00Z",
),
(
"records/contacts/sarah.md",
"contact",
"Sarah",
"2026-05-15T00:00:00Z",
),
] {
write_doc(&rb, rel, t, Some(s), Some(u), "");
}
Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
Index::rebuild_all(&rb).unwrap();
let a = snapshot_artifacts(&store);
let b = snapshot_artifacts(&rb);
assert_eq!(
a.keys().collect::<BTreeSet<_>>(),
b.keys().collect::<BTreeSet<_>>(),
"same artifact set after indexing both folders"
);
for (k, v) in &a {
assert_eq!(
v, &b[k],
"after indexing the sibling too, loop result must equal rebuild for {k}"
);
}
assert!(
read(&store, "index.md").contains("## Records (3)"),
"now that both folders are indexed, the root total is 3"
);
}
#[test]
fn wiki_page_at_shard_path_for_is_indexable_end_to_end() {
let (_d1, wt) = mk_store();
let (_d2, rb) = mk_store();
let rel = wt
.shard_path_for(
"wiki-page",
&crate::parser::Frontmatter::default(),
"renewal-theme",
)
.unwrap();
let rel_str = path_to_unix(&rel);
assert!(
type_folder_of(&rel).is_some(),
"shard_path_for produced a path the index cannot file: {rel_str}"
);
write_doc(
&wt,
&rel_str,
"wiki-page",
Some("Renewal theme"),
Some("2026-05-21T10:00:00Z"),
"",
);
write_doc(
&rb,
&rel_str,
"wiki-page",
Some("Renewal theme"),
Some("2026-05-21T10:00:00Z"),
"",
);
Index::on_write(&wt, &rel)
.expect("on_write must succeed for a toolkit-computed wiki-page path");
Index::rebuild_all(&rb).unwrap();
let page_link = wiki_target(&rel); let tf_md = read(&rb, "records/wiki-page/index.md");
assert!(
tf_md.contains(&format!("[[{page_link}]]")),
"type-folder index must list the page link, got:\n{tf_md}"
);
assert!(
exists(&rb, "records/wiki-page/index.jsonl"),
"type-folder jsonl must exist"
);
assert!(
read(&rb, "records/wiki-page/index.jsonl").contains(&rel_str),
"type-folder jsonl must contain the page row"
);
let layer_md = read(&rb, "records/index.md");
assert!(
layer_md.contains("records/wiki-page/index"),
"layer index must roll up the records/wiki-page type-folder, got:\n{layer_md}"
);
let a = snapshot_artifacts(&wt);
let b = snapshot_artifacts(&rb);
assert_eq!(
a.keys().collect::<Vec<_>>(),
b.keys().collect::<Vec<_>>(),
"loop and sweep must produce the same artifact set"
);
for (k, v) in &a {
assert_eq!(
v, &b[k],
"wiki-page artifact {k} differs between on_write and rebuild"
);
}
}
#[test]
fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
let (_d1, wt) = mk_store();
let (_d2, rb) = mk_store();
let total = MD_CAP + 3; let mut all_rels = Vec::new();
for i in 0..total {
let rel = format!("sources/emails/2026/05/m-{i:04}.md");
let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
write_doc(
&wt,
&rel,
"email",
Some(&format!("mail {i}")),
Some(&updated),
"",
);
write_doc(
&rb,
&rel,
"email",
Some(&format!("mail {i}")),
Some(&updated),
"",
);
all_rels.push(rel);
}
Index::rebuild_all(&wt).unwrap();
let newest = &all_rels[total - 1]; fs::remove_file(wt.root.join(newest)).unwrap();
Index::on_remove(&wt, Path::new(newest)).unwrap();
fs::remove_file(rb.root.join(newest)).unwrap();
Index::rebuild_all(&rb).unwrap();
let a = snapshot_artifacts(&wt);
let b = snapshot_artifacts(&rb);
for (k, v) in &a {
assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
}
let md = read(&wt, "sources/emails/index.md");
assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
assert!(
!md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
"removed file must not be listed in md"
);
let pulled_in = &all_rels[2];
assert!(
md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
"the 501st-most-recent must be pulled into the browse view after a removal"
);
assert!(
md.contains(&format!("This folder has {} files.", total - 1)),
"footer count must decrement:\n{}",
md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
);
let jsonl = read(&wt, "sources/emails/index.jsonl");
assert_eq!(
jsonl.lines().count(),
total - 1,
"jsonl loses exactly the removed file"
);
assert!(
!jsonl.contains(&path_to_unix(Path::new(newest))),
"removed file must be gone from the jsonl too"
);
}
#[test]
fn on_rename_cross_folder_matches_rebuild() {
let (_d1, wt) = mk_store();
let (_d2, rb) = mk_store();
let seed: &[(&str, &str, &str, &str)] = &[
(
"records/contacts/a.md",
"contact",
"A",
"2026-05-01T00:00:00Z",
),
(
"records/contacts/b.md",
"contact",
"B",
"2026-05-02T00:00:00Z",
),
(
"records/companies/x.md",
"company",
"X",
"2026-05-03T00:00:00Z",
),
];
for (rel, t, s, u) in seed {
write_doc(&wt, rel, t, Some(s), Some(u), "");
write_doc(&rb, rel, t, Some(s), Some(u), "");
}
Index::rebuild_all(&wt).unwrap();
let old = "records/contacts/b.md";
let new = "records/companies/b.md";
fs::create_dir_all(wt.root.join("records/companies")).unwrap();
fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
fs::create_dir_all(rb.root.join("records/companies")).unwrap();
fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
Index::rebuild_all(&rb).unwrap();
let a = snapshot_artifacts(&wt);
let b = snapshot_artifacts(&rb);
assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
for (k, v) in &a {
assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
}
let contacts = read(&wt, "records/contacts/index.md");
assert!(!contacts.contains("records/contacts/b]]"));
let companies = read(&wt, "records/companies/index.md");
assert!(companies.contains("[[records/companies/b]]"));
}
#[test]
fn on_write_updates_existing_entry_in_place() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/a.md",
"contact",
Some("Original"),
Some("2026-05-01T00:00:00Z"),
"",
);
Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
write_doc(
&store,
"records/contacts/a.md",
"contact",
Some("Revised"),
Some("2026-05-09T00:00:00Z"),
"",
);
Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
let jsonl = read(&store, "records/contacts/index.jsonl");
assert_eq!(
jsonl.lines().count(),
1,
"upsert must not duplicate the line"
);
assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
assert!(
!jsonl.contains("Original"),
"stale line must be gone (compacted)"
);
let md = read(&store, "records/contacts/index.md");
assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
assert!(
md.contains("updated: 2026-05-09T00:00:00Z\n"),
"index updated must track the newer member"
);
}
#[test]
fn dry_run_emits_separators_and_writes_nothing() {
let (_d, store) = mk_store();
write_doc(
&store,
"sources/emails/2026/05/a.md",
"email",
Some("Mail"),
Some("2026-05-01T00:00:00Z"),
"",
);
let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
.unwrap();
assert!(
out.contains("--- sources/emails/index.md ---\n"),
"md separator:\n{out}"
);
assert!(
out.contains("--- sources/emails/index.jsonl ---\n"),
"jsonl separator:\n{out}"
);
assert!(
out.contains("- [[sources/emails/2026/05/a]] — Mail"),
"md body present"
);
assert!(
!exists(&store, "sources/emails/index.md"),
"dry-run must not write"
);
assert!(
!exists(&store, "sources/emails/index.jsonl"),
"dry-run must not write"
);
}
#[test]
fn cleanup_removes_noncanonical_and_empty_indexes() {
let (_d, store) = mk_store();
write_doc(
&store,
"sources/emails/2026/05/a.md",
"email",
Some("Mail"),
Some("2026-05-01T00:00:00Z"),
"",
);
fs::write(
store.root.join("sources/emails/2026/05/index.md"),
"stale\n",
)
.unwrap();
fs::write(
store.root.join("sources/emails/2026/05/index.jsonl"),
"stale\n",
)
.unwrap();
fs::create_dir_all(store.root.join("records/empty")).unwrap();
fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
Index::cleanup(&store).unwrap();
assert!(
!exists(&store, "sources/emails/2026/05/index.md"),
"shard index must be deleted"
);
assert!(
!exists(&store, "sources/emails/2026/05/index.jsonl"),
"shard jsonl must be deleted"
);
assert!(
!exists(&store, "records/empty/index.md"),
"empty-folder index must be deleted"
);
assert!(exists(&store, "sources/emails/2026/05/a.md"));
}
#[test]
fn rebuild_deletes_stale_indexes_for_emptied_folders() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/a.md",
"contact",
Some("A"),
Some("2026-05-01T00:00:00Z"),
"",
);
Index::rebuild_all(&store).unwrap();
assert!(exists(&store, "records/contacts/index.md"));
assert!(exists(&store, "records/index.md"));
assert!(exists(&store, "index.md"));
fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
Index::rebuild_all(&store).unwrap();
assert!(
!exists(&store, "records/contacts/index.md"),
"emptied type-folder index gone"
);
assert!(
!exists(&store, "records/index.md"),
"now-empty layer index gone"
);
assert!(!exists(&store, "index.md"), "now-empty root index gone");
}
#[test]
fn property_writethrough_equals_rebuild_under_mixed_ops() {
let (_d1, wt) = mk_store();
let (_d2, rb) = mk_store();
let mut seed: u64 = 0x9E3779B97F4A7C15;
let mut next = || {
seed = seed
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
(seed >> 33) as u32
};
let folders = ["sources/emails", "records/contacts", "records/profiles"];
let types = ["email", "contact", "profile"];
let mut live: Vec<String> = Vec::new();
for step in 0..120u32 {
let r = next();
let op = r % 10;
if op < 6 || live.is_empty() {
let fi = (next() as usize) % folders.len();
let folder = folders[fi];
let id = next() % 40;
let rel = if folder == "sources/emails" {
let month = 5 + (id % 2); format!("{folder}/2026/{month:02}/f-{id:02}.md")
} else {
format!("{folder}/f-{id:02}.md")
};
let updated = format!(
"2026-05-{:02}T{:02}:{:02}:00Z",
1 + (step % 27),
step % 24,
id % 60
);
let extra = if id % 3 == 0 {
"tags:\n - x\n - y\n"
} else {
""
};
write_doc(
&wt,
&rel,
types[fi],
Some(&format!("sum {step}")),
Some(&updated),
extra,
);
write_doc(
&rb,
&rel,
types[fi],
Some(&format!("sum {step}")),
Some(&updated),
extra,
);
Index::on_write(&wt, Path::new(&rel)).unwrap();
if !live.contains(&rel) {
live.push(rel);
}
} else if op < 8 {
let idx = (next() as usize) % live.len();
let rel = live.remove(idx);
fs::remove_file(wt.root.join(&rel)).unwrap();
fs::remove_file(rb.root.join(&rel)).ok();
Index::on_remove(&wt, Path::new(&rel)).unwrap();
} else {
let idx = (next() as usize) % live.len();
let old = live[idx].clone();
let fi = (next() as usize) % folders.len();
let folder = folders[fi];
let id = 50 + (next() % 40);
let new = if folder == "sources/emails" {
format!("{folder}/2026/05/f-{id:02}.md")
} else {
format!("{folder}/f-{id:02}.md")
};
if new == old || live.contains(&new) {
continue;
}
fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
live[idx] = new;
}
}
Index::rebuild_all(&rb).unwrap();
let a = snapshot_artifacts(&wt);
let b = snapshot_artifacts(&rb);
assert_eq!(
a.keys().collect::<BTreeSet<_>>(),
b.keys().collect::<BTreeSet<_>>(),
"write-through and rebuild must produce the same set of artifacts"
);
for (k, v) in &a {
assert_eq!(
v, &b[k],
"INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
b[k]
);
}
assert!(
!a.is_empty(),
"the run must have produced at least one artifact"
);
}
#[test]
fn cleanup_preserves_user_content_named_index_md_in_shard() {
let (_d, store) = mk_store();
write_doc(
&store,
"sources/emails/2026/06/index.md",
"email",
Some("Important imported mail"),
Some("2026-06-11T04:23:25Z"),
"",
);
Index::cleanup(&store).unwrap();
assert!(
exists(&store, "sources/emails/2026/06/index.md"),
"cleanup must not delete a user content file named index.md"
);
Index::rebuild_all(&store).unwrap();
assert!(
exists(&store, "sources/emails/2026/06/index.md"),
"rebuild_all must not delete a user content file named index.md"
);
let kept = read(&store, "sources/emails/2026/06/index.md");
assert!(
kept.contains("Important imported mail"),
"the user's record content must be intact"
);
}
#[test]
fn cleanup_keeps_canonical_type_folder_root_sidecars() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/alice.md",
"contact",
Some("Alice"),
Some("2026-05-01T00:00:00Z"),
"",
);
Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
assert!(exists(&store, "records/contacts/index.md"));
assert!(exists(&store, "records/contacts/index.jsonl"));
Index::cleanup(&store).unwrap();
assert!(
exists(&store, "records/contacts/index.md"),
"cleanup must keep the canonical type-folder index.md (non-empty folder)"
);
assert!(
exists(&store, "records/contacts/index.jsonl"),
"cleanup must keep the canonical type-folder index.jsonl (non-empty folder)"
);
}
#[test]
fn on_write_ignores_index_artifact_no_phantom_row() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/alice.md",
"contact",
Some("Alice"),
Some("2026-05-01T00:00:00Z"),
"",
);
Index::on_write(&store, Path::new("records/contacts/alice.md")).unwrap();
let jsonl_before = read(&store, "records/contacts/index.jsonl");
assert_eq!(jsonl_before.lines().count(), 1);
Index::on_write(&store, Path::new("records/contacts/index.md")).unwrap();
let jsonl_after = read(&store, "records/contacts/index.jsonl");
assert_eq!(
jsonl_after.lines().count(),
1,
"on_write on index.md must not add a phantom self-row"
);
assert!(
!jsonl_after.contains("\"type\":\"index\""),
"the catalog artifact must never appear as a catalogued row"
);
let root = read(&store, "index.md");
assert!(
root.contains("[[records/contacts/index|Contacts]] (1)"),
"count must not inflate:\n{root}"
);
}
#[test]
fn multiline_summary_is_single_lined_in_index_md() {
let (_d, store) = mk_store();
write_raw(
&store,
"records/notes/evil.md",
"type: note\nupdated: 2026-06-10T00:00:00Z\nsummary: |-\n legit first line\n - [[records/secrets/fake|Click me]] — injected entry",
"\nbody\n",
);
let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
let md = idx.to_markdown();
let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
assert_eq!(
entry_lines, 1,
"a multi-line summary must not produce extra entry lines:\n{md}"
);
assert!(
md.contains(
"- [[records/notes/evil]] — legit first line - [[records/secrets/fake|Click me]] — injected entry\n"
),
"summary newlines must collapse to spaces inline:\n{md}"
);
}
#[test]
fn non_string_scalar_summary_and_type_are_coerced_like_validator() {
let (_d, store) = mk_store();
write_raw(
&store,
"records/contacts/a.md",
"type: contact\nupdated: 2026-05-01T00:00:00Z\nsummary: 2026",
"\nbody\n",
);
let rec = record_from_file(
&store.root.join("records/contacts/a.md"),
PathBuf::from("records/contacts/a.md"),
)
.unwrap();
assert_eq!(rec.summary, "2026");
assert_eq!(rec.type_, "contact");
let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
let md = idx.to_markdown();
assert!(
md.contains("- [[records/contacts/a]] — 2026\n"),
"index entry must hold the coerced scalar, not the placeholder:\n{md}"
);
write_raw(
&store,
"records/contacts/b.md",
"type: true\nupdated: 2026-05-02T00:00:00Z\nsummary: hi",
"\nbody\n",
);
let rec_b = record_from_file(
&store.root.join("records/contacts/b.md"),
PathBuf::from("records/contacts/b.md"),
)
.unwrap();
assert_eq!(rec_b.type_, "true");
}
#[test]
fn non_utf8_body_does_not_abort_record_projection() {
let (_d, store) = mk_store();
let rel = "sources/emails/2026/06/x.md";
let abs = store.root.join(rel);
fs::create_dir_all(abs.parent().unwrap()).unwrap();
let mut bytes: Vec<u8> =
b"---\ntype: email\nupdated: 2026-06-11T00:00:00Z\nsummary: An imported email\n---\n\nCaf"
.to_vec();
bytes.push(0xE9);
bytes.extend_from_slice(b" meeting notes\n");
fs::write(&abs, bytes).unwrap();
let rec = record_from_file(&abs, PathBuf::from(rel))
.expect("non-UTF-8 body must not abort the frontmatter read");
assert_eq!(rec.summary, "An imported email");
assert_eq!(rec.type_, "email");
Index::rebuild_all(&store).unwrap();
assert!(
exists(&store, "sources/emails/index.jsonl"),
"rebuild must produce the catalog despite a non-UTF-8 body byte"
);
assert!(
read(&store, "sources/emails/index.jsonl").contains("An imported email"),
"the record must be catalogued"
);
}
#[test]
fn rebuild_aborts_on_malformed_file_and_keeps_prior_catalogs() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/alice.md",
"contact",
Some("Alice"),
Some("2026-05-01T00:00:00Z"),
"",
);
write_doc(
&store,
"records/companies/acme.md",
"company",
Some("Acme"),
Some("2026-05-02T00:00:00Z"),
"",
);
Index::rebuild_all(&store).expect("clean rebuild succeeds");
assert!(exists(&store, "records/contacts/index.jsonl"));
assert!(exists(&store, "records/companies/index.jsonl"));
let bad = store.root.join("records/contacts/broken.md");
fs::write(
&bad,
"---\ntype: contact\nsummary: \"unterminated\n---\nbody\n",
)
.unwrap();
Index::rebuild_all(&store)
.expect_err("rebuild must abort, not silently skip, on a malformed file");
assert!(
exists(&store, "records/companies/index.jsonl"),
"an aborted rebuild must not destroy a clean sibling folder's catalog"
);
assert!(
exists(&store, "records/contacts/index.jsonl"),
"an aborted rebuild must not destroy the affected folder's prior catalog"
);
let contacts_jsonl = read(&store, "records/contacts/index.jsonl");
assert!(contacts_jsonl.contains("records/contacts/alice.md"));
}
#[test]
fn rebuild_rollup_counts_equal_jsonl_records_and_write_through() {
let (_d, store) = mk_store();
write_doc(
&store,
"records/contacts/alice.md",
"contact",
Some("Alice"),
Some("2026-05-01T00:00:00Z"),
"",
);
write_doc(
&store,
"records/contacts/bob.md",
"contact",
Some("Bob"),
Some("2026-05-02T00:00:00Z"),
"",
);
Index::rebuild_all(&store).expect("clean rebuild succeeds");
let jsonl_lines = read(&store, "records/contacts/index.jsonl")
.lines()
.filter(|l| !l.trim().is_empty())
.count();
assert_eq!(jsonl_lines, 2, "two well-formed files ⇒ two jsonl records");
let layer_md = read(&store, "records/index.md");
let root_md = read(&store, "index.md");
assert!(
layer_md.contains("- [[records/contacts/index|Contacts]] (2)"),
"layer rollup (N) must equal the jsonl record count (2), not a raw .md walk:\n{layer_md}"
);
assert!(
root_md.contains("- [[records/contacts/index|Contacts]] (2)\n")
&& root_md.contains("## Records (2)"),
"root rollup (N)/layer total must equal the jsonl record count (2):\n{root_md}"
);
let (_d2, wt) = mk_store();
write_doc(
&wt,
"records/contacts/alice.md",
"contact",
Some("Alice"),
Some("2026-05-01T00:00:00Z"),
"",
);
write_doc(
&wt,
"records/contacts/bob.md",
"contact",
Some("Bob"),
Some("2026-05-02T00:00:00Z"),
"",
);
Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
Index::on_write(&wt, Path::new("records/contacts/bob.md")).unwrap();
let a = snapshot_artifacts(&wt);
let b = snapshot_artifacts(&store);
assert_eq!(
a.keys().collect::<BTreeSet<_>>(),
b.keys().collect::<BTreeSet<_>>(),
"write-through and rebuild_all must produce the same artifact set"
);
for (k, v) in &a {
assert_eq!(
v, &b[k],
"rollup bytes diverged between write-through and rebuild_all for {k} \
(a skip-version inflates rebuild_all's (N) above the jsonl record \
count, which write-through then rewrites):\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
b[k]
);
}
}
#[cfg(unix)]
#[test]
fn non_utf8_path_component_is_kept_not_dropped() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let mut leaf = b"caf".to_vec();
leaf.push(0xE9);
leaf.extend_from_slice(b".md");
let p = Path::new("sources/emails").join(OsStr::from_bytes(&leaf));
let unix = path_to_unix(&p);
assert_ne!(
unix, "sources/emails",
"non-UTF-8 leaf must not be dropped, collapsing the path to its parent dir"
);
assert!(
unix.starts_with("sources/emails/caf"),
"the lossy leaf must remain under its folder: {unix}"
);
}
}