use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use chrono::{DateTime, Datelike, FixedOffset};
use grep::regex::RegexMatcher;
use grep::searcher::sinks::UTF8;
use grep::searcher::Searcher;
use ignore::WalkBuilder;
use crate::index::IndexRecord;
use crate::parser::{parse_db_md, Config, Frontmatter};
const NON_CONTENT_BASENAMES: [&str; 3] = ["DB.md", "index.md", "log.md"];
const TYPE_INDEX_FILE: &str = "index.jsonl";
#[derive(Debug, thiserror::Error)]
#[error("not a db.md store: {path} has no DB.md")]
pub struct NotAStore {
pub path: PathBuf,
}
#[derive(Debug, thiserror::Error)]
pub enum StoreError {
#[error("failed to read type index {path}: {message}")]
BadTypeIndex {
path: PathBuf,
message: String,
},
#[error("cannot compute shard path for {file}: no usable date field")]
NoShardDate {
file: PathBuf,
},
#[error("search failed under {root}: {message}")]
Search {
root: PathBuf,
message: String,
},
#[error(transparent)]
Io(#[from] std::io::Error),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Layer {
Sources,
Records,
Wiki,
}
impl Layer {
pub fn dir_name(self) -> &'static str {
match self {
Layer::Sources => "sources",
Layer::Records => "records",
Layer::Wiki => "wiki",
}
}
pub fn from_dir_name(name: &str) -> Option<Self> {
match name {
"sources" => Some(Layer::Sources),
"records" => Some(Layer::Records),
"wiki" => Some(Layer::Wiki),
_ => None,
}
}
pub fn all() -> [Layer; 3] {
[Layer::Sources, Layer::Records, Layer::Wiki]
}
}
#[derive(Debug, Clone)]
pub struct Store {
pub root: PathBuf,
pub config: Config,
}
impl Store {
pub fn is_db_md_store(path: &Path) -> bool {
let entries = match std::fs::read_dir(path) {
Ok(entries) => entries,
Err(_) => return false,
};
for entry in entries.flatten() {
if entry.file_name() == "DB.md" {
match entry.file_type() {
Ok(ft) if ft.is_dir() => return false,
Ok(_) => return true,
Err(_) => return false,
}
}
}
false
}
pub fn open(path: &Path) -> Result<Store, NotAStore> {
if !Store::is_db_md_store(path) {
return Err(NotAStore {
path: path.to_path_buf(),
});
}
let db_md = path.join("DB.md");
let config = match std::fs::read_to_string(&db_md) {
Ok(text) => parse_db_md(&text, &db_md).unwrap_or_default(),
Err(_) => Config::default(),
};
Ok(Store {
root: path.to_path_buf(),
config,
})
}
pub fn walk(&self) -> Result<Vec<PathBuf>, StoreError> {
let mut out = Vec::new();
for layer in Layer::all() {
out.extend(self.walk_layer(layer)?);
}
out.sort();
Ok(out)
}
pub fn walk_layer(&self, layer: Layer) -> Result<Vec<PathBuf>, StoreError> {
let layer_root = self.root.join(layer.dir_name());
if !layer_root.is_dir() {
return Ok(Vec::new());
}
self.walk_content_md(&layer_root)
}
pub fn walk_type_folder(&self, type_folder: &Path) -> Result<Vec<PathBuf>, StoreError> {
let abs = self.resolve_under_root(type_folder);
if !abs.is_dir() {
return Ok(Vec::new());
}
self.walk_content_md(&abs)
}
pub fn recent_in_type_folder(
&self,
type_folder: &Path,
n: usize,
) -> Result<Vec<PathBuf>, StoreError> {
let files = self.walk_type_folder(type_folder)?;
let mut keyed: Vec<(Option<DateTime<FixedOffset>>, PathBuf)> = files
.into_iter()
.map(|rel| {
let updated = self.read_updated(&self.abs_path(&rel));
(updated, rel)
})
.collect();
keyed.sort_by(|a, b| {
let by_updated = b.0.cmp(&a.0);
by_updated.then_with(|| a.1.cmp(&b.1))
});
keyed.truncate(n);
Ok(keyed.into_iter().map(|(_, rel)| rel).collect())
}
pub fn type_shards(&self, type_: &str) -> bool {
matches!(
type_,
"email" | "transcript" | "pdf-source"
| "expense" | "invoice" | "meeting"
| "order" | "ticket" | "transaction"
)
}
pub fn shard_path_for(
&self,
type_: &str,
frontmatter: &Frontmatter,
name: &str,
) -> Result<PathBuf, StoreError> {
self.shard_path_in(&default_type_folder(type_), type_, frontmatter, name)
}
pub fn shard_path_in(
&self,
folder: &Path,
type_: &str,
frontmatter: &Frontmatter,
name: &str,
) -> Result<PathBuf, StoreError> {
let folder = folder.to_path_buf();
let filename = ensure_md_extension(name);
if !self.type_shards(type_) {
return Ok(folder.join(filename));
}
let (year, month) = self
.primary_shard_segment(type_, frontmatter)
.ok_or_else(|| StoreError::NoShardDate {
file: folder.join(&filename),
})?;
Ok(folder.join(year).join(month).join(filename))
}
pub fn find_links_to(&self, target: &Path) -> Result<Vec<PathBuf>, StoreError> {
self.find_links_to_any(&[target.to_path_buf()])
}
pub fn find_links_to_any(&self, targets: &[PathBuf]) -> Result<Vec<PathBuf>, StoreError> {
let mut arms: Vec<String> = Vec::new();
for target in targets {
let target_str = path_to_link_str(target);
if target_str.is_empty() {
continue;
}
arms.push(format!(
r"\[\[{}(\.md)?(\|[^\]]*)?\]\]",
regex::escape(&target_str)
));
}
if arms.is_empty() {
return Ok(Vec::new());
}
let pattern = arms.join("|");
let matcher = RegexMatcher::new(&pattern).map_err(|e| StoreError::Search {
root: self.root.clone(),
message: format!("invalid backlink pattern: {e}"),
})?;
let mut hits = std::collections::BTreeSet::new();
for rel in self.walk_all_md()? {
let abs = self.abs_path(&rel);
let mut matched_here = false;
let mut searcher = Searcher::new();
let res = searcher.search_path(
&matcher,
&abs,
UTF8(|_lnum, _line| {
matched_here = true;
Ok(false)
}),
);
if let Err(e) = res {
return Err(StoreError::Search {
root: self.root.clone(),
message: format!("search failed in {}: {e}", abs.display()),
});
}
if matched_here {
hits.insert(rel);
}
}
Ok(hits.into_iter().collect())
}
pub fn find_by_type(&self, type_: &str) -> Result<Vec<IndexRecord>, StoreError> {
let canonical_folder = default_type_folder(type_);
let canonical = self.root.join(&canonical_folder).join(TYPE_INDEX_FILE);
let records = if canonical.is_file() {
self.read_type_index(&canonical)?
} else {
self.read_all_type_indexes_in(layer_of_folder(&canonical_folder))?
};
Ok(records.into_iter().filter(|r| r.type_ == type_).collect())
}
pub fn find_by_where(&self, key: &str, value: &str) -> Result<Vec<IndexRecord>, StoreError> {
self.find_by_where_in(key, value, None)
}
pub fn find_by_where_in(
&self,
key: &str,
value: &str,
layer: Option<Layer>,
) -> Result<Vec<IndexRecord>, StoreError> {
let records = self.read_all_type_indexes_in(layer)?;
Ok(records
.into_iter()
.filter(|r| record_matches_field(r, key, value))
.collect())
}
pub fn sidecar_records(&self, layer: Option<Layer>) -> Result<Vec<IndexRecord>, StoreError> {
self.read_all_type_indexes_in(layer)
}
pub fn read_type_index(&self, index_jsonl: &Path) -> Result<Vec<IndexRecord>, StoreError> {
let text = std::fs::read_to_string(index_jsonl).map_err(|e| StoreError::BadTypeIndex {
path: index_jsonl.to_path_buf(),
message: e.to_string(),
})?;
let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
for (i, line) in text.lines().enumerate() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
let record: IndexRecord =
serde_json::from_str(trimmed).map_err(|e| StoreError::BadTypeIndex {
path: index_jsonl.to_path_buf(),
message: format!("line {}: {e}", i + 1),
})?;
by_path.insert(record.path.clone(), record);
}
Ok(by_path.into_values().collect())
}
pub fn abs_path(&self, store_relative: &Path) -> PathBuf {
self.root.join(store_relative)
}
pub fn rel_path(&self, abs: &Path) -> Option<PathBuf> {
abs.strip_prefix(&self.root).ok().map(|p| p.to_path_buf())
}
fn resolve_under_root(&self, folder: &Path) -> PathBuf {
if folder.is_absolute() {
folder.to_path_buf()
} else {
self.root.join(folder)
}
}
fn walk_content_md(&self, root: &Path) -> Result<Vec<PathBuf>, StoreError> {
let mut out = Vec::new();
for entry in self.md_walker(root).build() {
let entry = entry.map_err(|e| StoreError::Search {
root: root.to_path_buf(),
message: e.to_string(),
})?;
if !is_file_entry(&entry) {
continue;
}
let path = entry.path();
if !has_md_extension(path) {
continue;
}
if is_non_content_basename(path) {
continue;
}
if let Some(rel) = self.rel_path(path) {
out.push(rel);
}
}
out.sort();
Ok(out)
}
fn walk_all_md(&self) -> Result<Vec<PathBuf>, StoreError> {
let mut out = Vec::new();
for entry in self.md_walker(&self.root).build() {
let entry = entry.map_err(|e| StoreError::Search {
root: self.root.clone(),
message: e.to_string(),
})?;
if !is_file_entry(&entry) {
continue;
}
let path = entry.path();
if !has_md_extension(path) {
continue;
}
if self.is_in_log_dir(path) {
continue;
}
if let Some(rel) = self.rel_path(path) {
out.push(rel);
}
}
out.sort();
Ok(out)
}
fn read_all_type_indexes_in(
&self,
layer: Option<Layer>,
) -> Result<Vec<IndexRecord>, StoreError> {
let mut out = Vec::new();
for sidecar in self.find_type_index_files_in(layer)? {
out.extend(self.read_type_index(&self.abs_path(&sidecar))?);
}
Ok(out)
}
fn find_type_index_files_in(&self, layer: Option<Layer>) -> Result<Vec<PathBuf>, StoreError> {
let walk_root = match layer {
Some(l) => self.root.join(l.dir_name()),
None => self.root.clone(),
};
if !walk_root.is_dir() {
return Ok(Vec::new());
}
let mut out = Vec::new();
let mut builder = WalkBuilder::new(&walk_root);
builder.standard_filters(false).hidden(true);
for entry in builder.build() {
let entry = entry.map_err(|e| StoreError::Search {
root: walk_root.clone(),
message: e.to_string(),
})?;
if !is_file_entry(&entry) {
continue;
}
let path = entry.path();
if path.file_name().and_then(|n| n.to_str()) != Some(TYPE_INDEX_FILE) {
continue;
}
if self.is_in_log_dir(path) {
continue;
}
if let Some(rel) = self.rel_path(path) {
out.push(rel);
}
}
out.sort();
Ok(out)
}
fn md_walker(&self, root: &Path) -> WalkBuilder {
let mut builder = WalkBuilder::new(root);
builder.standard_filters(false).hidden(true);
builder
}
fn is_in_log_dir(&self, abs: &Path) -> bool {
match self.rel_path(abs) {
Some(rel) => rel.components().next().map(|c| c.as_os_str()) == Some("log".as_ref()),
None => false,
}
}
fn read_updated(&self, abs: &Path) -> Option<DateTime<FixedOffset>> {
let text = std::fs::read_to_string(abs).ok()?;
let yaml = frontmatter_block(&text)?;
let value: serde_yml::Value = serde_yml::from_str(yaml).ok()?;
let raw = value.get("updated")?;
value_to_datetime(raw)
}
fn primary_shard_segment(&self, type_: &str, fm: &Frontmatter) -> Option<(String, String)> {
if let Some(field) = primary_date_field(type_) {
if let Some(v) = fm.extra.get(field) {
if let Some(seg) = value_to_year_month(v) {
return Some(seg);
}
}
}
fm.created
.map(|dt| (format!("{:04}", dt.year()), format!("{:02}", dt.month())))
}
}
fn is_file_entry(entry: &ignore::DirEntry) -> bool {
entry.file_type().map(|ft| ft.is_file()).unwrap_or(false)
}
fn has_md_extension(path: &Path) -> bool {
path.extension().and_then(|e| e.to_str()) == Some("md")
}
fn is_non_content_basename(path: &Path) -> bool {
match path.file_name().and_then(|n| n.to_str()) {
Some(name) => NON_CONTENT_BASENAMES.contains(&name),
None => false,
}
}
fn ensure_md_extension(name: &str) -> String {
if name.ends_with(".md") {
name.to_string()
} else {
format!("{name}.md")
}
}
fn path_to_link_str(target: &Path) -> String {
let mut parts: Vec<String> = Vec::new();
for comp in target.components() {
if let std::path::Component::Normal(os) = comp {
if let Some(s) = os.to_str() {
parts.push(s.to_string());
}
}
}
let mut joined = parts.join("/");
if let Some(stripped) = joined.strip_suffix(".md") {
joined = stripped.to_string();
}
joined
}
fn default_type_folder(type_: &str) -> PathBuf {
let path = match type_ {
"email" => "sources/emails",
"transcript" => "sources/transcripts",
"pdf-source" => "sources/docs",
"contact" => "records/contacts",
"company" => "records/companies",
"expense" => "records/expenses",
"meeting" => "records/meetings",
"decision" => "records/decisions",
"invoice" => "records/invoices",
"wiki-page" => "wiki/topics",
other => return PathBuf::from("records").join(other),
};
PathBuf::from(path)
}
pub fn layer_for_type(type_: &str) -> Layer {
layer_of_folder(&default_type_folder(type_)).unwrap_or(Layer::Records)
}
fn layer_of_folder(folder: &Path) -> Option<Layer> {
let first = folder.components().next()?.as_os_str().to_str()?;
Layer::from_dir_name(first)
}
pub fn infer_type_from_path(rel: &Path) -> Option<String> {
let mut comps = rel.components().filter_map(|c| c.as_os_str().to_str());
let layer = comps.next()?;
if !matches!(layer, "sources" | "records" | "wiki") {
return None;
}
let folder = comps.next()?;
comps.next()?;
let mapped = match (layer, folder) {
("sources", "emails") => "email",
("sources", "transcripts") => "transcript",
("sources", "docs") => "pdf-source",
("records", "contacts") => "contact",
("records", "companies") => "company",
("records", "expenses") => "expense",
("records", "meetings") => "meeting",
("records", "decisions") => "decision",
("records", "invoices") => "invoice",
("wiki", _) => "wiki-page",
(_, other) => other,
};
Some(mapped.to_string())
}
fn primary_date_field(type_: &str) -> Option<&'static str> {
match type_ {
"email" => Some("date"),
"transcript" => Some("recorded_at"),
"pdf-source" => Some("received_at"),
"expense" | "invoice" | "meeting" => Some("date"),
_ => None,
}
}
fn value_to_datetime(value: &serde_yml::Value) -> Option<DateTime<FixedOffset>> {
let s = yaml_scalar_string(value)?;
DateTime::parse_from_rfc3339(s.trim()).ok()
}
fn value_to_year_month(value: &serde_yml::Value) -> Option<(String, String)> {
let s = yaml_scalar_string(value)?;
year_month_from_str(s.trim())
}
fn year_month_from_str(s: &str) -> Option<(String, String)> {
let bytes = s.as_bytes();
if bytes.len() < 7 {
return None;
}
let is_digit = |b: u8| b.is_ascii_digit();
if !(is_digit(bytes[0])
&& is_digit(bytes[1])
&& is_digit(bytes[2])
&& is_digit(bytes[3])
&& bytes[4] == b'-'
&& is_digit(bytes[5])
&& is_digit(bytes[6]))
{
return None;
}
let month: u8 = (bytes[5] - b'0') * 10 + (bytes[6] - b'0');
if !(1..=12).contains(&month) {
return None;
}
Some((s[0..4].to_string(), s[5..7].to_string()))
}
fn yaml_scalar_string(value: &serde_yml::Value) -> Option<String> {
if let Some(s) = value.as_str() {
return Some(s.to_string());
}
match value {
serde_yml::Value::Null => None,
serde_yml::Value::Mapping(_) | serde_yml::Value::Sequence(_) => None,
other => serde_yml::to_string(other)
.ok()
.map(|s| s.trim().to_string()),
}
}
fn frontmatter_block(text: &str) -> Option<&str> {
let body = text.strip_prefix('\u{feff}').unwrap_or(text);
let mut rest = body;
let (first, after_first) = split_first_line(rest);
if first.trim_end_matches('\r') != "---" {
return None;
}
rest = after_first;
let block_start = rest;
let mut scanned = 0usize;
loop {
let (line, after) = split_first_line(rest);
if line.trim_end_matches('\r') == "---" {
return Some(&block_start[..scanned]);
}
if after.is_empty() && line.is_empty() {
return None;
}
scanned += line.len() + 1; if after.is_empty() {
return None;
}
rest = after;
}
}
fn split_first_line(s: &str) -> (&str, &str) {
match s.find('\n') {
Some(i) => (&s[..i], &s[i + 1..]),
None => (s, ""),
}
}
fn record_matches_field(record: &IndexRecord, key: &str, value: &str) -> bool {
match key {
"type" => record.type_ == value,
"summary" => record.summary == value,
"path" => record.path.to_string_lossy() == value,
"created" => timestamp_matches(record.created, value),
"updated" => timestamp_matches(record.updated, value),
"tags" => record.tags.iter().any(|t| t == value),
"links" => record.links.iter().any(|l| l == value),
other => record
.fields
.get(other)
.map(|v| json_value_matches(v, value))
.unwrap_or(false),
}
}
fn timestamp_matches(stored: Option<DateTime<FixedOffset>>, value: &str) -> bool {
match (stored, DateTime::parse_from_rfc3339(value)) {
(Some(stored), Ok(queried)) => stored == queried,
_ => false,
}
}
fn json_value_matches(v: &serde_json::Value, value: &str) -> bool {
match v {
serde_json::Value::String(s) => s == value,
serde_json::Value::Bool(b) => b.to_string() == value,
serde_json::Value::Number(n) => n.to_string() == value,
serde_json::Value::Array(items) => items.iter().any(|i| json_value_matches(i, value)),
serde_json::Value::Null => value.is_empty(),
serde_json::Value::Object(_) => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::{tempdir, TempDir};
fn write(root: &Path, rel: &str, contents: &str) -> PathBuf {
let abs = root.join(rel);
fs::create_dir_all(abs.parent().unwrap()).unwrap();
fs::write(&abs, contents).unwrap();
PathBuf::from(rel)
}
fn content_md(updated: &str) -> String {
format!(
"---\ntype: note\ncreated: {updated}\nupdated: {updated}\nsummary: a note\n---\n\nbody\n"
)
}
fn empty_store() -> TempDir {
let dir = tempdir().unwrap();
fs::write(
dir.path().join("DB.md"),
"---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n",
)
.unwrap();
dir
}
fn open(dir: &TempDir) -> Store {
Store::open(dir.path()).expect("fixture should be a valid store")
}
fn rels(paths: &[PathBuf]) -> Vec<String> {
paths
.iter()
.map(|p| p.to_string_lossy().replace('\\', "/"))
.collect()
}
#[test]
fn layer_dir_name_and_parse_are_inverse() {
for layer in Layer::all() {
assert_eq!(Layer::from_dir_name(layer.dir_name()), Some(layer));
}
assert_eq!(Layer::Sources.dir_name(), "sources");
assert_eq!(Layer::Records.dir_name(), "records");
assert_eq!(Layer::Wiki.dir_name(), "wiki");
assert_eq!(Layer::from_dir_name("log"), None);
assert_eq!(Layer::from_dir_name("Sources"), None); }
#[test]
fn layer_order_is_canonical() {
let mut v = [Layer::Wiki, Layer::Sources, Layer::Records];
v.sort();
assert_eq!(v, [Layer::Sources, Layer::Records, Layer::Wiki]);
}
#[test]
fn is_store_true_only_with_uppercase_marker() {
let dir = tempdir().unwrap();
assert!(
!Store::is_db_md_store(dir.path()),
"no marker → not a store"
);
fs::write(dir.path().join("DB.md"), "---\ntype: db-md\n---\n").unwrap();
assert!(Store::is_db_md_store(dir.path()), "uppercase DB.md → store");
}
#[test]
fn is_store_false_for_lowercase_db_md() {
let dir = tempdir().unwrap();
fs::write(dir.path().join("db.md"), "---\ntype: db-md\n---\n").unwrap();
assert!(
!Store::is_db_md_store(dir.path()),
"lowercase db.md must NOT be treated as a store marker"
);
assert!(Store::open(dir.path()).is_err());
}
#[test]
fn is_store_false_when_db_md_is_a_directory() {
let dir = tempdir().unwrap();
fs::create_dir(dir.path().join("DB.md")).unwrap();
assert!(
!Store::is_db_md_store(dir.path()),
"a directory named DB.md is not the file marker"
);
}
#[test]
fn open_rejects_non_store_with_path() {
let dir = tempdir().unwrap();
let err = Store::open(dir.path()).unwrap_err();
assert_eq!(err.path, dir.path());
}
#[test]
fn open_succeeds_and_parses_config() {
let dir = tempdir().unwrap();
fs::write(
dir.path().join("DB.md"),
"---\ntype: db-md\nscope: company\nowner: Test\n---\n\n# Store\n\n\
## Policies\n\n### Frozen pages\n- records/decisions/q1.md\n",
)
.unwrap();
let store = Store::open(dir.path()).unwrap();
assert_eq!(store.root, dir.path());
assert!(
store
.config
.frozen_pages
.iter()
.any(|p| p == Path::new("records/decisions/q1.md")),
"open() must surface DB.md ## Policies, got {:?}",
store.config.frozen_pages
);
}
#[test]
fn walk_collects_content_across_layers_skipping_meta_and_log() {
let dir = empty_store();
let root = dir.path();
write(
root,
"sources/emails/2026/05/a.md",
&content_md("2026-05-01T00:00:00Z"),
);
write(
root,
"records/contacts/sarah.md",
&content_md("2026-05-02T00:00:00Z"),
);
write(
root,
"wiki/people/sarah.md",
&content_md("2026-05-03T00:00:00Z"),
);
write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); write(root, "index.md", "---\ntype: index\n---\n"); write(root, "log.md", "---\ntype: log\n---\n"); write(root, "log/2026-04.md", "---\ntype: log\n---\n"); write(
root,
"sources/.hidden/secret.md",
&content_md("2026-05-09T00:00:00Z"),
); write(root, "records/contacts/notes.txt", "not markdown");
let store = open(&dir);
let got = rels(&store.walk().unwrap());
assert_eq!(
got,
vec![
"records/contacts/sarah.md".to_string(),
"sources/emails/2026/05/a.md".to_string(),
"wiki/people/sarah.md".to_string(),
]
);
}
#[test]
fn walk_layer_is_scoped() {
let dir = empty_store();
let root = dir.path();
write(
root,
"sources/emails/2026/05/a.md",
&content_md("2026-05-01T00:00:00Z"),
);
write(
root,
"records/contacts/sarah.md",
&content_md("2026-05-02T00:00:00Z"),
);
let store = open(&dir);
assert_eq!(
rels(&store.walk_layer(Layer::Sources).unwrap()),
vec!["sources/emails/2026/05/a.md".to_string()]
);
assert_eq!(
rels(&store.walk_layer(Layer::Records).unwrap()),
vec!["records/contacts/sarah.md".to_string()]
);
assert!(store.walk_layer(Layer::Wiki).unwrap().is_empty());
}
#[test]
fn walk_type_folder_recurses_shards_and_accepts_abs_or_rel() {
let dir = empty_store();
let root = dir.path();
write(
root,
"sources/emails/2026/05/a.md",
&content_md("2026-05-01T00:00:00Z"),
);
write(
root,
"sources/emails/2026/06/b.md",
&content_md("2026-06-01T00:00:00Z"),
);
write(root, "sources/emails/index.md", "---\ntype: index\n---\n"); write(
root,
"sources/docs/2026/05/c.md",
&content_md("2026-05-04T00:00:00Z"),
);
let store = open(&dir);
let expected = vec![
"sources/emails/2026/05/a.md".to_string(),
"sources/emails/2026/06/b.md".to_string(),
];
assert_eq!(
rels(&store.walk_type_folder(Path::new("sources/emails")).unwrap()),
expected
);
assert_eq!(
rels(
&store
.walk_type_folder(&root.join("sources/emails"))
.unwrap()
),
expected
);
}
#[test]
fn recent_orders_by_updated_desc_then_path_and_caps() {
let dir = empty_store();
let root = dir.path();
write(
root,
"records/meetings/2026/05/c.md",
&content_md("2026-05-03T00:00:00Z"),
);
write(
root,
"records/meetings/2026/05/a.md",
&content_md("2026-05-02T00:00:00Z"),
);
write(
root,
"records/meetings/2026/05/b.md",
&content_md("2026-05-02T00:00:00Z"),
);
write(
root,
"records/meetings/2026/04/z.md",
&content_md("2026-04-01T00:00:00Z"),
);
let store = open(&dir);
let all = rels(
&store
.recent_in_type_folder(Path::new("records/meetings"), 10)
.unwrap(),
);
assert_eq!(
all,
vec![
"records/meetings/2026/05/c.md".to_string(), "records/meetings/2026/05/a.md".to_string(), "records/meetings/2026/05/b.md".to_string(),
"records/meetings/2026/04/z.md".to_string(), ]
);
let top2 = rels(
&store
.recent_in_type_folder(Path::new("records/meetings"), 2)
.unwrap(),
);
assert_eq!(
top2,
vec![
"records/meetings/2026/05/c.md".to_string(),
"records/meetings/2026/05/a.md".to_string(),
]
);
}
#[test]
fn recent_sorts_undated_files_last() {
let dir = empty_store();
let root = dir.path();
write(
root,
"records/contacts/dated.md",
&content_md("2026-05-01T00:00:00Z"),
);
write(
root,
"records/contacts/undated.md",
"---\ntype: contact\nsummary: x\n---\nbody\n",
);
let store = open(&dir);
let got = rels(
&store
.recent_in_type_folder(Path::new("records/contacts"), 10)
.unwrap(),
);
assert_eq!(
got,
vec![
"records/contacts/dated.md".to_string(),
"records/contacts/undated.md".to_string(),
],
"a file with a real `updated` must outrank one with none"
);
}
#[test]
fn type_shards_classification() {
let dir = empty_store();
let store = open(&dir);
for t in [
"email",
"transcript",
"pdf-source",
"expense",
"invoice",
"meeting",
"order",
"ticket",
"transaction",
] {
assert!(store.type_shards(t), "{t} should shard");
}
for t in [
"contact",
"company",
"decision",
"wiki-page",
"index",
"log",
"db-md",
"proposal",
] {
assert!(!store.type_shards(t), "{t} should stay flat");
}
}
fn fm_with_extra(key: &str, value: &str) -> Frontmatter {
let mut fm = Frontmatter::default();
fm.extra
.insert(key.to_string(), serde_yml::Value::String(value.to_string()));
fm
}
fn fm_with_created(rfc3339: &str) -> Frontmatter {
Frontmatter {
created: Some(DateTime::parse_from_rfc3339(rfc3339).unwrap()),
..Default::default()
}
}
#[test]
fn shard_path_uses_primary_date_field_per_type() {
let dir = empty_store();
let store = open(&dir);
let p = store
.shard_path_for("expense", &fm_with_extra("date", "2026-05-22"), "lunch")
.unwrap();
assert_eq!(p, PathBuf::from("records/expenses/2026/05/lunch.md"));
let p = store
.shard_path_for(
"email",
&fm_with_extra("date", "2026-11-02T09:00:00-07:00"),
"e1",
)
.unwrap();
assert_eq!(p, PathBuf::from("sources/emails/2026/11/e1.md"));
let p = store
.shard_path_for(
"transcript",
&fm_with_extra("recorded_at", "2025-01-15T12:00:00Z"),
"t1",
)
.unwrap();
assert_eq!(p, PathBuf::from("sources/transcripts/2025/01/t1.md"));
}
#[test]
fn shard_path_falls_back_to_created() {
let dir = empty_store();
let store = open(&dir);
let p = store
.shard_path_for(
"meeting",
&fm_with_created("2024-07-09T08:30:00-04:00"),
"sync",
)
.unwrap();
assert_eq!(p, PathBuf::from("records/meetings/2024/07/sync.md"));
}
#[test]
fn shard_path_primary_field_wins_over_created() {
let dir = empty_store();
let store = open(&dir);
let mut fm = fm_with_created("2020-01-01T00:00:00Z");
fm.extra
.insert("date".into(), serde_yml::Value::String("2026-05-22".into()));
let p = store.shard_path_for("expense", &fm, "x").unwrap();
assert_eq!(p, PathBuf::from("records/expenses/2026/05/x.md"));
}
#[test]
fn shard_path_flat_types_have_no_shard_segment() {
let dir = empty_store();
let store = open(&dir);
let p = store
.shard_path_for(
"contact",
&fm_with_created("2026-05-22T00:00:00Z"),
"sarah-chen",
)
.unwrap();
assert_eq!(p, PathBuf::from("records/contacts/sarah-chen.md"));
let p = store
.shard_path_for("wiki-page", &Frontmatter::default(), "renewal-theme")
.unwrap();
assert_eq!(p, PathBuf::from("wiki/topics/renewal-theme.md"));
}
#[test]
fn shard_path_wiki_page_is_indexable_three_component_path() {
let dir = empty_store();
let store = open(&dir);
let p = store
.shard_path_for("wiki-page", &Frontmatter::default(), "renewal-theme")
.unwrap();
let comps: Vec<&str> = p.iter().filter_map(|c| c.to_str()).collect();
assert_eq!(
comps.len(),
3,
"wiki-page path must be <layer>/<type-folder>/<file>, got {p:?}"
);
assert_eq!(comps[0], "wiki", "first component must be the wiki layer");
assert!(
!comps[1].is_empty() && comps[1] != "renewal-theme.md",
"second component must be a real type-folder, not the file: {p:?}"
);
assert!(
comps[2].ends_with(".md"),
"third component must be the .md file: {p:?}"
);
}
#[test]
fn shard_path_preserves_and_adds_md_extension() {
let dir = empty_store();
let store = open(&dir);
let with = store
.shard_path_for("contact", &Frontmatter::default(), "sarah.md")
.unwrap();
let without = store
.shard_path_for("contact", &Frontmatter::default(), "sarah")
.unwrap();
assert_eq!(with, PathBuf::from("records/contacts/sarah.md"));
assert_eq!(without, PathBuf::from("records/contacts/sarah.md"));
}
#[test]
fn shard_path_errors_when_sharding_type_has_no_date() {
let dir = empty_store();
let store = open(&dir);
let err = store
.shard_path_for("expense", &Frontmatter::default(), "mystery")
.unwrap_err();
match err {
StoreError::NoShardDate { file } => {
assert_eq!(file, PathBuf::from("records/expenses/mystery.md"));
}
other => panic!("expected NoShardDate, got {other:?}"),
}
}
#[test]
fn find_links_to_matches_all_accepted_spellings() {
let dir = empty_store();
let root = dir.path();
let target = "records/contacts/sarah-chen";
write(
root,
"wiki/people/sarah.md",
&format!("---\ntype: wiki-page\nsummary: s\n---\nSee [[{target}]].\n"),
);
write(
root,
"records/meetings/2026/05/m.md",
&format!("---\ntype: meeting\nsummary: s\n---\nWith [[{target}|Sarah]].\n"),
);
write(
root,
"wiki/themes/t.md",
&format!("---\ntype: wiki-page\nsummary: s\n---\n[[{target}.md]]\n"),
);
write(
root,
"records/contacts/index.md",
&format!("---\ntype: index\n---\n- [[{target}]] — Sarah\n"),
);
write(
root,
"wiki/people/elena.md",
"---\ntype: wiki-page\nsummary: s\n---\nNo links here.\n",
);
write(
root,
"wiki/people/bob.md",
"---\ntype: wiki-page\nsummary: s\n---\n[[sarah-chen]]\n",
);
write(
root,
"wiki/people/jr.md",
&format!("---\ntype: wiki-page\nsummary: s\n---\n[[{target}-jr]]\n"),
);
let store = open(&dir);
let got = rels(&store.find_links_to(Path::new(target)).unwrap());
assert_eq!(
got,
vec![
"records/contacts/index.md".to_string(),
"records/meetings/2026/05/m.md".to_string(),
"wiki/people/sarah.md".to_string(),
"wiki/themes/t.md".to_string(),
]
);
}
#[test]
fn find_links_to_distinguishes_sibling_paths() {
let dir = empty_store();
let root = dir.path();
write(
root,
"wiki/a.md",
"---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah]]\n",
);
write(
root,
"wiki/b.md",
"---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
);
let store = open(&dir);
assert_eq!(
rels(
&store
.find_links_to(Path::new("records/contacts/sarah"))
.unwrap()
),
vec!["wiki/a.md".to_string()]
);
assert_eq!(
rels(
&store
.find_links_to(Path::new("records/contacts/sarah-chen"))
.unwrap()
),
vec!["wiki/b.md".to_string()]
);
}
#[test]
fn find_links_to_any_returns_the_union_with_boundary_correctness() {
let dir = empty_store();
let root = dir.path();
write(
root,
"wiki/links-sarah.md",
"---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
);
write(
root,
"wiki/links-acme.md",
"---\ntype: wiki-page\nsummary: s\n---\nDeal with [[records/companies/acme|Acme]].\n",
);
write(
root,
"records/meetings/2026/05/m.md",
"---\ntype: meeting\nsummary: s\n---\n[[records/contacts/sarah-chen]] re \
[[records/companies/acme]]\n",
);
write(
root,
"wiki/links-jr.md",
"---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen-jr]]\n",
);
write(
root,
"wiki/unrelated.md",
"---\ntype: wiki-page\nsummary: s\n---\n[[wiki/themes/spend]]\n",
);
let store = open(&dir);
let targets = vec![
PathBuf::from("records/contacts/sarah-chen"),
PathBuf::from("records/companies/acme"),
];
let got = rels(&store.find_links_to_any(&targets).unwrap());
assert_eq!(
got,
vec![
"records/meetings/2026/05/m.md".to_string(),
"wiki/links-acme.md".to_string(),
"wiki/links-sarah.md".to_string(),
],
"batch finder must return the deduped union of linkers across all \
targets, excluding the prefix-sibling and the unrelated file"
);
let mut union: std::collections::BTreeSet<PathBuf> = std::collections::BTreeSet::new();
for t in &targets {
for linker in store.find_links_to(t).unwrap() {
union.insert(linker);
}
}
assert_eq!(
rels(&union.into_iter().collect::<Vec<_>>()),
got,
"find_links_to_any must equal the union of per-target find_links_to"
);
}
#[test]
fn find_links_to_any_empty_targets_matches_nothing() {
let dir = empty_store();
let root = dir.path();
write(
root,
"wiki/a.md",
"---\ntype: wiki-page\nsummary: s\n---\n[[records/contacts/sarah-chen]]\n",
);
let store = open(&dir);
assert!(
store.find_links_to_any(&[]).unwrap().is_empty(),
"no targets ⇒ no linkers (an empty pattern must not match every file)"
);
assert!(
store
.find_links_to_any(&[PathBuf::from(""), PathBuf::from("./")])
.unwrap()
.is_empty(),
"targets that render to empty link text contribute no alternation arm"
);
}
#[test]
fn read_type_index_parses_records_and_flattens_fields() {
let dir = empty_store();
let root = dir.path();
let jsonl = "\
{\"path\":\"records/expenses/2026/05/a.md\",\"type\":\"expense\",\"summary\":\"lunch\",\"tags\":[\"meals\"],\"links\":[\"records/companies/acme\"],\"created\":\"2026-05-01T00:00:00Z\",\"updated\":\"2026-05-01T00:00:00Z\",\"vendor\":\"acme\",\"amount\":42}
{\"path\":\"records/expenses/2026/05/b.md\",\"type\":\"expense\",\"summary\":\"taxi\",\"created\":null,\"updated\":null,\"vendor\":\"yellow\"}
";
let p = write(root, "records/expenses/index.jsonl", jsonl);
let store = open(&dir);
let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
assert_eq!(recs.len(), 2);
assert_eq!(recs[0].path, PathBuf::from("records/expenses/2026/05/a.md"));
assert_eq!(recs[0].type_, "expense");
assert_eq!(recs[0].summary, "lunch");
assert_eq!(recs[0].tags, vec!["meals".to_string()]);
assert_eq!(recs[0].links, vec!["records/companies/acme".to_string()]);
assert!(recs[0].created.is_some());
assert_eq!(
recs[0].fields.get("vendor"),
Some(&serde_json::json!("acme"))
);
assert_eq!(recs[0].fields.get("amount"), Some(&serde_json::json!(42)));
assert!(recs[1].tags.is_empty());
assert!(recs[1].links.is_empty());
}
#[test]
fn read_type_index_last_write_wins_and_skips_blanks() {
let dir = empty_store();
let root = dir.path();
let jsonl = "\
{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"old\",\"created\":null,\"updated\":null}
{\"path\":\"records/contacts/sarah.md\",\"type\":\"contact\",\"summary\":\"new\",\"created\":null,\"updated\":null}
";
let p = write(root, "records/contacts/index.jsonl", jsonl);
let store = open(&dir);
let recs = store.read_type_index(&store.abs_path(&p)).unwrap();
assert_eq!(recs.len(), 1, "duplicate path collapses to one record");
assert_eq!(recs[0].summary, "new", "later line must win");
}
#[test]
fn read_type_index_errors_on_malformed_line() {
let dir = empty_store();
let root = dir.path();
let p = write(root, "records/contacts/index.jsonl", "{not valid json}\n");
let store = open(&dir);
let err = store.read_type_index(&store.abs_path(&p)).unwrap_err();
assert!(matches!(err, StoreError::BadTypeIndex { .. }));
}
fn jsonl_line(path: &str, type_: &str, summary: &str, extra: &str) -> String {
format!(
"{{\"path\":\"{path}\",\"type\":\"{type_}\",\"summary\":\"{summary}\",\"created\":null,\"updated\":null{extra}}}\n"
)
}
#[test]
fn find_by_type_reads_canonical_folder_sidecar() {
let dir = empty_store();
let root = dir.path();
write(
root,
"records/contacts/index.jsonl",
&(jsonl_line("records/contacts/sarah.md", "contact", "Sarah", "")
+ &jsonl_line("records/contacts/elena.md", "contact", "Elena", "")),
);
write(
root,
"records/companies/index.jsonl",
&jsonl_line("records/companies/acme.md", "company", "Acme", ""),
);
let store = open(&dir);
let recs = store.find_by_type("contact").unwrap();
let names: Vec<_> = recs.iter().map(|r| r.summary.clone()).collect();
assert_eq!(names, vec!["Elena".to_string(), "Sarah".to_string()]); assert!(recs.iter().all(|r| r.type_ == "contact"));
}
#[test]
fn find_by_type_canonical_absent_falls_back_within_the_layer_only() {
let dir = empty_store();
let root = dir.path();
write(
root,
"records/proposals/index.jsonl",
&jsonl_line("records/proposals/p1.md", "proposal", "Q3 proposal", ""),
);
write(
root,
"sources/proposals/index.jsonl",
&jsonl_line(
"sources/proposals/leak.md",
"proposal",
"cross-layer decoy",
"",
),
);
let store = open(&dir);
let recs = store.find_by_type("proposal").unwrap();
assert_eq!(
recs.len(),
1,
"only the records-layer proposal, not the sources decoy"
);
assert_eq!(recs[0].summary, "Q3 proposal");
assert_eq!(recs[0].path, PathBuf::from("records/proposals/p1.md"));
}
#[test]
fn find_by_type_canonical_absent_does_not_read_other_layers() {
let dir = empty_store();
let root = dir.path();
write(
root,
"records/contacts/index.jsonl",
&jsonl_line("records/contacts/sarah.md", "contact", "Sarah", ""),
);
let store = open(&dir);
assert!(store.find_by_type("email").unwrap().is_empty());
}
#[test]
fn find_by_where_matches_typed_columns_and_flat_fields() {
let dir = empty_store();
let root = dir.path();
write(
root,
"records/expenses/index.jsonl",
&(jsonl_line(
"records/expenses/a.md",
"expense",
"lunch",
",\"vendor\":\"acme\",\"tags\":[\"meals\"]",
) + &jsonl_line(
"records/expenses/b.md",
"expense",
"taxi",
",\"vendor\":\"yellow\"",
)),
);
write(
root,
"records/contacts/index.jsonl",
&jsonl_line(
"records/contacts/sarah.md",
"contact",
"Sarah",
",\"tags\":[\"customer\"]",
),
);
let store = open(&dir);
let by_vendor = store.find_by_where("vendor", "acme").unwrap();
assert_eq!(by_vendor.len(), 1);
assert_eq!(by_vendor[0].path, PathBuf::from("records/expenses/a.md"));
assert_eq!(store.find_by_where("type", "expense").unwrap().len(), 2);
let customers = store.find_by_where("tags", "customer").unwrap();
assert_eq!(customers.len(), 1);
assert_eq!(
customers[0].path,
PathBuf::from("records/contacts/sarah.md")
);
assert!(store.find_by_where("vendor", "nobody").unwrap().is_empty());
}
#[test]
fn find_by_where_matches_timestamps_across_rfc3339_spellings() {
let dir = empty_store();
let root = dir.path();
write(
root,
"records/meetings/index.jsonl",
"{\"path\":\"records/meetings/kickoff.md\",\"type\":\"meeting\",\
\"summary\":\"kickoff\",\"created\":\"2026-05-01T00:00:00Z\",\
\"updated\":\"2026-05-02T09:30:00-07:00\"}\n",
);
let store = open(&dir);
let by_z = store
.find_by_where("created", "2026-05-01T00:00:00Z")
.unwrap();
assert_eq!(by_z.len(), 1);
assert_eq!(by_z[0].path, PathBuf::from("records/meetings/kickoff.md"));
assert_eq!(
store
.find_by_where("created", "2026-05-01T00:00:00+00:00")
.unwrap()
.len(),
1
);
assert_eq!(
store
.find_by_where("updated", "2026-05-02T09:30:00-07:00")
.unwrap()
.len(),
1
);
assert_eq!(
store
.find_by_where("updated", "2026-05-02T16:30:00Z")
.unwrap()
.len(),
1
);
assert!(store
.find_by_where("created", "2026-05-01T00:00:01Z")
.unwrap()
.is_empty());
assert!(store
.find_by_where("created", "2026-05-01")
.unwrap()
.is_empty());
}
#[test]
fn find_by_where_in_layer_reads_only_that_layers_sidecars() {
let dir = empty_store();
let root = dir.path();
write(
root,
"records/companies/index.jsonl",
&jsonl_line(
"records/companies/acme.md",
"company",
"Acme",
",\"domain\":\"acme.com\"",
),
);
write(
root,
"sources/emails/index.jsonl",
"{ this is not valid json and would error if read }\n",
);
let store = open(&dir);
let in_records = store
.find_by_where_in("domain", "acme.com", Some(Layer::Records))
.expect("a records-scoped read must not touch the sources sidecar");
assert_eq!(
rels(
&in_records
.iter()
.map(|r| r.path.clone())
.collect::<Vec<_>>()
),
vec!["records/companies/acme.md".to_string()]
);
let store_wide = store.find_by_where("domain", "acme.com");
assert!(
matches!(store_wide, Err(StoreError::BadTypeIndex { .. })),
"unscoped read walks every layer and hits the corrupt sidecar"
);
let in_sources = store.find_by_where_in("domain", "acme.com", Some(Layer::Sources));
assert!(matches!(in_sources, Err(StoreError::BadTypeIndex { .. })));
}
#[test]
fn find_by_where_in_missing_layer_is_empty_not_an_error() {
let dir = empty_store();
let root = dir.path();
write(
root,
"records/contacts/index.jsonl",
&jsonl_line(
"records/contacts/sarah.md",
"contact",
"Sarah",
",\"city\":\"denver\"",
),
);
let store = open(&dir);
let in_wiki = store
.find_by_where_in("city", "denver", Some(Layer::Wiki))
.expect("missing layer subtree is empty, not an error");
assert!(in_wiki.is_empty());
let in_records = store
.find_by_where_in("city", "denver", Some(Layer::Records))
.unwrap();
assert_eq!(in_records.len(), 1);
}
#[test]
fn abs_and_rel_path_roundtrip() {
let dir = empty_store();
let store = open(&dir);
let rel = Path::new("records/contacts/sarah.md");
let abs = store.abs_path(rel);
assert_eq!(abs, dir.path().join(rel));
assert_eq!(store.rel_path(&abs).as_deref(), Some(rel));
assert_eq!(store.abs_path(&abs), abs);
assert_eq!(store.rel_path(Path::new("/somewhere/else.md")), None);
}
#[test]
fn infer_type_maps_every_recognized_folder_back_to_its_type() {
let cases = [
("sources/emails/x.md", "email"),
("sources/transcripts/x.md", "transcript"),
("sources/docs/x.md", "pdf-source"),
("records/contacts/x.md", "contact"),
("records/companies/x.md", "company"),
("records/expenses/x.md", "expense"),
("records/meetings/x.md", "meeting"),
("records/decisions/x.md", "decision"),
("records/invoices/x.md", "invoice"),
("wiki/topics/x.md", "wiki-page"),
("wiki/pricing/x.md", "wiki-page"),
];
for (path, expected) in cases {
assert_eq!(
infer_type_from_path(Path::new(path)).as_deref(),
Some(expected),
"path {path} should infer type {expected}"
);
}
}
#[test]
fn infer_type_round_trips_with_default_type_folder() {
let recognized = [
"email",
"transcript",
"pdf-source",
"contact",
"company",
"expense",
"meeting",
"decision",
"invoice",
"wiki-page",
];
for type_ in recognized {
let folder = default_type_folder(type_);
let file = folder.join("x.md");
assert_eq!(
infer_type_from_path(&file).as_deref(),
Some(type_),
"recognized type {type_} (folder {folder:?}) must round-trip"
);
}
}
#[test]
fn infer_type_round_trips_custom_types_verbatim_no_singularization() {
for custom in ["task", "tasks", "playbook", "process", "okrs", "ticket"] {
let folder = default_type_folder(custom);
assert_eq!(folder, PathBuf::from("records").join(custom));
let file = folder.join("x.md");
assert_eq!(
infer_type_from_path(&file).as_deref(),
Some(custom),
"custom type {custom} must round-trip verbatim (no singularization)"
);
}
assert_eq!(
infer_type_from_path(Path::new("records/tasks/x.md")).as_deref(),
Some("tasks"),
"records/tasks must infer `tasks`, not `task`"
);
}
#[test]
fn infer_type_requires_three_component_layer_folder_file_shape() {
assert_eq!(infer_type_from_path(Path::new("records/x.md")), None);
assert_eq!(infer_type_from_path(Path::new("sources/x.md")), None);
assert_eq!(infer_type_from_path(Path::new("wiki/x.md")), None);
assert_eq!(infer_type_from_path(Path::new("x.md")), None);
assert_eq!(infer_type_from_path(Path::new("foo/bar/x.md")), None);
assert_eq!(
infer_type_from_path(Path::new("records/expenses/2026/05/x.md")).as_deref(),
Some("expense"),
);
}
}