use serde_json::Value;
use crate::index::IndexRecord;
use crate::store::{Layer, Store, StoreError};
#[derive(Debug, Clone, Default)]
pub struct Query {
type_: Option<String>,
layer: Option<Layer>,
wheres: Vec<(String, String)>,
}
impl Query {
pub fn new() -> Self {
Self::default()
}
pub fn with_type(mut self, type_: &str) -> Self {
self.type_ = Some(type_.to_string());
self
}
pub fn with_layer(mut self, layer: Layer) -> Self {
self.layer = Some(layer);
self
}
pub fn with_where(mut self, key: &str, value: &str) -> Self {
self.wheres.push((key.to_string(), value.to_string()));
self
}
pub fn execute(&self, store: &Store) -> Result<Vec<IndexRecord>, StoreError> {
let (candidates, type_done, where_done) = if let Some(type_) = &self.type_ {
(store.find_by_type(type_)?, true, 0)
} else if let Some((key, value)) = self.wheres.first() {
(store.find_by_where_in(key, value, self.layer)?, false, 1)
} else {
return Ok(Vec::new());
};
Ok(self.filter_candidates(candidates, type_done, where_done))
}
fn filter_candidates(
&self,
candidates: Vec<IndexRecord>,
type_already_applied: bool,
wheres_already_applied: usize,
) -> Vec<IndexRecord> {
candidates
.into_iter()
.filter(|record| {
if !type_already_applied {
if let Some(type_) = &self.type_ {
if record.type_ != *type_ {
return false;
}
}
}
if let Some(layer) = self.layer {
if !record_in_layer(record, layer) {
return false;
}
}
self.wheres
.iter()
.skip(wheres_already_applied)
.all(|(key, value)| record_matches_where(record, key, value))
})
.collect()
}
}
fn record_in_layer(record: &IndexRecord, layer: Layer) -> bool {
record
.path
.components()
.next()
.and_then(|c| c.as_os_str().to_str())
== Some(layer_dir_name(layer))
}
fn layer_dir_name(layer: Layer) -> &'static str {
match layer {
Layer::Sources => "sources",
Layer::Records => "records",
Layer::Wiki => "wiki",
}
}
fn record_matches_where(record: &IndexRecord, key: &str, value: &str) -> bool {
match key {
"type" => record.type_ == value,
"summary" => record.summary == value,
"path" => record.path.to_str() == Some(value),
"tags" => record.tags.iter().any(|t| t == value),
"links" => record.links.iter().any(|l| l == value),
"created" => record.created.map(|t| t.to_rfc3339()).as_deref() == Some(value),
"updated" => record.updated.map(|t| t.to_rfc3339()).as_deref() == Some(value),
_ => record
.fields
.get(key)
.is_some_and(|v| json_value_matches(v, value)),
}
}
fn json_value_matches(value: &Value, target: &str) -> bool {
match value {
Value::String(s) => s == target,
Value::Number(n) => n.to_string() == target,
Value::Bool(b) => b.to_string() == target,
Value::Array(items) => items.iter().any(|item| json_value_matches(item, target)),
Value::Null => false,
Value::Object(_) => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::store::Store;
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
fn rec(path: &str, type_: &str, fields: &[(&str, Value)]) -> IndexRecord {
IndexRecord {
path: PathBuf::from(path),
type_: type_.to_string(),
summary: format!("summary of {path}"),
tags: Vec::new(),
links: Vec::new(),
created: None,
updated: None,
fields: fields
.iter()
.map(|(k, v)| (k.to_string(), v.clone()))
.collect(),
}
}
fn jsonl_line(record: &IndexRecord) -> String {
serde_json::to_string(record).expect("serialize IndexRecord")
}
const DB_MD: &str = "---\ntype: db-md\n---\n\n# Test store\n";
fn store_with_sidecars(sidecars: &[(&str, &[IndexRecord])]) -> (TempDir, Store) {
let dir = TempDir::new().expect("temp dir");
let root = dir.path();
fs::write(root.join("DB.md"), DB_MD).expect("write DB.md");
for (folder, records) in sidecars {
let folder_abs = root.join(folder);
fs::create_dir_all(&folder_abs).expect("create type folder");
let body: String = records
.iter()
.map(|r| format!("{}\n", jsonl_line(r)))
.collect();
fs::write(folder_abs.join("index.jsonl"), body).expect("write index.jsonl");
}
let store = Store::open(root).expect("open store");
(dir, store)
}
fn paths(records: &[IndexRecord]) -> std::collections::BTreeSet<String> {
records
.iter()
.map(|r| r.path.to_string_lossy().into_owned())
.collect()
}
fn path_set(items: &[&str]) -> std::collections::BTreeSet<String> {
items.iter().map(|s| s.to_string()).collect()
}
#[test]
fn builder_accumulates_predicates() {
let q = Query::new()
.with_type("contact")
.with_layer(Layer::Records)
.with_where("company", "acme")
.with_where("status", "active");
assert_eq!(q.type_.as_deref(), Some("contact"));
assert_eq!(q.layer, Some(Layer::Records));
assert_eq!(
q.wheres,
vec![
("company".to_string(), "acme".to_string()),
("status".to_string(), "active".to_string()),
],
"each with_where appends a distinct clause"
);
}
#[test]
fn with_type_and_with_layer_replace_rather_than_stack() {
let q = Query::new()
.with_type("contact")
.with_type("company")
.with_layer(Layer::Sources)
.with_layer(Layer::Wiki);
assert_eq!(q.type_.as_deref(), Some("company"));
assert_eq!(q.layer, Some(Layer::Wiki));
}
#[test]
fn repeated_with_where_same_key_keeps_both_clauses() {
let q = Query::new()
.with_where("updated", "2026-01-01T00:00:00+00:00")
.with_where("updated", "2026-02-01T00:00:00+00:00");
assert_eq!(q.wheres.len(), 2);
}
#[test]
fn execute_with_type_returns_only_that_types_folder() {
let contacts = [
rec("records/contacts/sarah.md", "contact", &[]),
rec("records/contacts/mara.md", "contact", &[]),
];
let companies = [rec("records/companies/acme.md", "company", &[])];
let (_dir, store) = store_with_sidecars(&[
("records/contacts", &contacts),
("records/companies", &companies),
]);
let got = Query::new().with_type("contact").execute(&store).unwrap();
assert_eq!(
paths(&got),
path_set(&["records/contacts/sarah.md", "records/contacts/mara.md"]),
"a type query reads its own type-folder sidecar and excludes other types"
);
}
#[test]
fn execute_type_plus_where_intersects_on_a_custom_field() {
let contacts = [
rec(
"records/contacts/sarah.md",
"contact",
&[("company", Value::String("acme".into()))],
),
rec(
"records/contacts/mara.md",
"contact",
&[("company", Value::String("globex".into()))],
),
rec("records/contacts/no-company.md", "contact", &[]),
];
let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
let got = Query::new()
.with_type("contact")
.with_where("company", "acme")
.execute(&store)
.unwrap();
assert_eq!(
paths(&got),
path_set(&["records/contacts/sarah.md"]),
"the where clause narrows the type's records to the matching field; \
a record missing the key does not match"
);
}
#[test]
fn execute_multiple_where_clauses_and_together() {
let contacts = [
rec(
"records/contacts/a.md",
"contact",
&[
("company", Value::String("acme".into())),
("status", Value::String("active".into())),
],
),
rec(
"records/contacts/b.md",
"contact",
&[
("company", Value::String("acme".into())),
("status", Value::String("churned".into())),
],
),
rec(
"records/contacts/c.md",
"contact",
&[
("company", Value::String("globex".into())),
("status", Value::String("active".into())),
],
),
];
let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
let got = Query::new()
.with_type("contact")
.with_where("company", "acme")
.with_where("status", "active")
.execute(&store)
.unwrap();
assert_eq!(paths(&got), path_set(&["records/contacts/a.md"]));
}
#[test]
fn execute_where_without_type_reads_across_sidecars() {
let contacts = [rec(
"records/contacts/sarah.md",
"contact",
&[("domain", Value::String("acme.com".into()))],
)];
let companies = [
rec(
"records/companies/acme.md",
"company",
&[("domain", Value::String("acme.com".into()))],
),
rec(
"records/companies/globex.md",
"company",
&[("domain", Value::String("globex.com".into()))],
),
];
let (_dir, store) = store_with_sidecars(&[
("records/contacts", &contacts),
("records/companies", &companies),
]);
let got = Query::new()
.with_where("domain", "acme.com")
.execute(&store)
.unwrap();
assert_eq!(
paths(&got),
path_set(&["records/contacts/sarah.md", "records/companies/acme.md"]),
"a where-only query matches the field across every type-folder sidecar"
);
}
#[test]
fn execute_with_layer_scopes_by_path() {
let source_recs = [rec(
"sources/notes/n1.md",
"note",
&[("topic", Value::String("billing".into()))],
)];
let record_recs = [rec(
"records/notes/n2.md",
"note",
&[("topic", Value::String("billing".into()))],
)];
let (_dir, store) = store_with_sidecars(&[
("sources/notes", &source_recs),
("records/notes", &record_recs),
]);
let unscoped = Query::new()
.with_where("topic", "billing")
.execute(&store)
.unwrap();
assert_eq!(
paths(&unscoped),
path_set(&["sources/notes/n1.md", "records/notes/n2.md"]),
);
let scoped = Query::new()
.with_where("topic", "billing")
.with_layer(Layer::Sources)
.execute(&store)
.unwrap();
assert_eq!(
paths(&scoped),
path_set(&["sources/notes/n1.md"]),
"with_layer(Sources) drops the records/-layer record"
);
}
#[test]
fn execute_where_only_with_layer_confines_sidecar_io_not_just_result() {
let dir = TempDir::new().unwrap();
let root = dir.path();
fs::write(root.join("DB.md"), DB_MD).unwrap();
let records_dir = root.join("records/contacts");
fs::create_dir_all(&records_dir).unwrap();
let match_rec = rec(
"records/contacts/sarah.md",
"contact",
&[("domain", Value::String("acme.com".into()))],
);
fs::write(
records_dir.join("index.jsonl"),
format!("{}\n", jsonl_line(&match_rec)),
)
.unwrap();
let sources_dir = root.join("sources/emails");
fs::create_dir_all(&sources_dir).unwrap();
fs::write(sources_dir.join("index.jsonl"), "{ not valid json }\n").unwrap();
let store = Store::open(root).unwrap();
let scoped = Query::new()
.with_where("domain", "acme.com")
.with_layer(Layer::Records)
.execute(&store)
.expect("a records-scoped where query must not read the sources sidecar");
assert_eq!(paths(&scoped), path_set(&["records/contacts/sarah.md"]));
let unscoped = Query::new()
.with_where("domain", "acme.com")
.execute(&store);
assert!(
unscoped.is_err(),
"an unscoped where query reads every sidecar, including the corrupt one"
);
}
#[test]
fn execute_full_composition_type_layer_where() {
let contacts = [
rec(
"records/contacts/match.md",
"contact",
&[("city", Value::String("denver".into()))],
),
rec(
"records/contacts/wrong-city.md",
"contact",
&[("city", Value::String("austin".into()))],
),
];
let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
let got = Query::new()
.with_type("contact")
.with_layer(Layer::Records)
.with_where("city", "denver")
.execute(&store)
.unwrap();
assert_eq!(paths(&got), path_set(&["records/contacts/match.md"]));
let wrong_layer = Query::new()
.with_type("contact")
.with_layer(Layer::Wiki)
.with_where("city", "denver")
.execute(&store)
.unwrap();
assert!(wrong_layer.is_empty());
}
#[test]
fn execute_empty_query_selects_no_sidecar() {
let contacts = [rec("records/contacts/sarah.md", "contact", &[])];
let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
let got = Query::new().execute(&store).unwrap();
assert!(
got.is_empty(),
"an unconstrained query resolves to empty, not to every record"
);
let layer_only = Query::new()
.with_layer(Layer::Records)
.execute(&store)
.unwrap();
assert!(layer_only.is_empty());
}
#[test]
fn execute_tag_membership_via_where() {
let mut urgent = rec("records/tasks/t1.md", "task", &[]);
urgent.tags = vec!["urgent".into(), "ops".into()];
let mut calm = rec("records/tasks/t2.md", "task", &[]);
calm.tags = vec!["ops".into()];
let recs = [urgent, calm];
let (_dir, store) = store_with_sidecars(&[("records/tasks", &recs)]);
let got = Query::new()
.with_type("task")
.with_where("tags", "urgent")
.execute(&store)
.unwrap();
assert_eq!(
paths(&got),
path_set(&["records/tasks/t1.md"]),
"tags match on membership: only the record carrying the tag matches"
);
}
#[test]
fn execute_matches_numeric_and_bool_fields_from_string_predicate() {
let recs = [
rec(
"records/invoices/paid.md",
"invoice",
&[
("amount", Value::Number(42.into())),
("paid", Value::Bool(true)),
],
),
rec(
"records/invoices/unpaid.md",
"invoice",
&[
("amount", Value::Number(99.into())),
("paid", Value::Bool(false)),
],
),
];
let (_dir, store) = store_with_sidecars(&[("records/invoices", &recs)]);
let by_amount = Query::new()
.with_type("invoice")
.with_where("amount", "42")
.execute(&store)
.unwrap();
assert_eq!(
paths(&by_amount),
path_set(&["records/invoices/paid.md"]),
"a JSON number matches the string form of the predicate"
);
let by_paid = Query::new()
.with_type("invoice")
.with_where("paid", "true")
.execute(&store)
.unwrap();
assert_eq!(
paths(&by_paid),
path_set(&["records/invoices/paid.md"]),
"a JSON bool matches \"true\"/\"false\""
);
}
#[test]
fn execute_honors_last_write_wins_in_sidecar() {
let dir = TempDir::new().unwrap();
let root = dir.path();
fs::write(root.join("DB.md"), DB_MD).unwrap();
let folder = root.join("records/contacts");
fs::create_dir_all(&folder).unwrap();
let old = rec(
"records/contacts/sarah.md",
"contact",
&[("status", Value::String("lead".into()))],
);
let new = rec(
"records/contacts/sarah.md",
"contact",
&[("status", Value::String("customer".into()))],
);
fs::write(
folder.join("index.jsonl"),
format!("{}\n{}\n", jsonl_line(&old), jsonl_line(&new)),
)
.unwrap();
let store = Store::open(root).unwrap();
let superseding = Query::new()
.with_type("contact")
.with_where("status", "customer")
.execute(&store)
.unwrap();
assert_eq!(superseding.len(), 1, "the superseding line's value matches");
let superseded = Query::new()
.with_type("contact")
.with_where("status", "lead")
.execute(&store)
.unwrap();
assert!(
superseded.is_empty(),
"the superseded line's value no longer matches after last-write-wins"
);
}
#[test]
fn execute_returns_full_records_not_just_paths() {
let mut r = rec(
"records/contacts/sarah.md",
"contact",
&[("company", Value::String("acme".into()))],
);
r.summary = "Renewal champion".into();
r.tags = vec!["vip".into()];
r.links = vec!["wiki/people/sarah-chen.md".into()];
let recs = [r];
let (_dir, store) = store_with_sidecars(&[("records/contacts", &recs)]);
let got = Query::new().with_type("contact").execute(&store).unwrap();
assert_eq!(got.len(), 1);
let only = &got[0];
assert_eq!(only.summary, "Renewal champion");
assert_eq!(only.tags, vec!["vip".to_string()]);
assert_eq!(only.links, vec!["wiki/people/sarah-chen.md".to_string()]);
assert_eq!(
only.fields.get("company"),
Some(&Value::String("acme".into())),
"type-specific fields come back verbatim for on-demand use"
);
}
#[test]
fn record_matches_where_on_typed_columns() {
let mut r = rec("records/contacts/x.md", "contact", &[]);
r.summary = "hello".into();
assert!(record_matches_where(&r, "type", "contact"));
assert!(!record_matches_where(&r, "type", "company"));
assert!(record_matches_where(&r, "summary", "hello"));
assert!(!record_matches_where(&r, "summary", "goodbye"));
assert!(record_matches_where(&r, "path", "records/contacts/x.md"));
assert!(!record_matches_where(&r, "path", "records/contacts/y.md"));
}
#[test]
fn record_matches_where_on_timestamps_uses_rfc3339() {
let mut r = rec("records/meetings/m.md", "meeting", &[]);
let ts = chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00+00:00").unwrap();
r.created = Some(ts);
assert!(record_matches_where(
&r,
"created",
"2026-05-29T12:00:00+00:00"
));
assert!(!record_matches_where(
&r,
"created",
"2026-05-29T13:00:00+00:00"
));
assert!(!record_matches_where(
&r,
"updated",
"2026-05-29T12:00:00+00:00"
));
}
#[test]
fn record_matches_where_absent_field_is_false() {
let r = rec("records/contacts/x.md", "contact", &[]);
assert!(
!record_matches_where(&r, "nonexistent", "anything"),
"an absent frontmatter key never matches"
);
}
#[test]
fn json_value_matches_covers_scalars_and_arrays() {
assert!(json_value_matches(&Value::String("acme".into()), "acme"));
assert!(!json_value_matches(&Value::String("acme".into()), "globex"));
assert!(json_value_matches(&Value::Number(42.into()), "42"));
assert!(!json_value_matches(&Value::Number(42.into()), "43"));
assert!(json_value_matches(&Value::Bool(true), "true"));
assert!(json_value_matches(&Value::Bool(false), "false"));
assert!(!json_value_matches(&Value::Bool(true), "false"));
let arr = Value::Array(vec![Value::String("a".into()), Value::String("b".into())]);
assert!(json_value_matches(&arr, "b"), "array matches on membership");
assert!(!json_value_matches(&arr, "c"));
}
#[test]
fn json_value_matches_null_and_object_never_match() {
assert!(!json_value_matches(&Value::Null, ""));
assert!(!json_value_matches(&Value::Null, "null"));
let obj = serde_json::json!({"k": "v"});
assert!(!json_value_matches(&obj, "v"));
}
#[test]
fn record_in_layer_keys_off_first_path_component() {
let s = rec("sources/emails/e.md", "email", &[]);
let r = rec("records/contacts/c.md", "contact", &[]);
let w = rec("wiki/people/p.md", "wiki-page", &[]);
assert!(record_in_layer(&s, Layer::Sources));
assert!(!record_in_layer(&s, Layer::Records));
assert!(record_in_layer(&r, Layer::Records));
assert!(!record_in_layer(&r, Layer::Wiki));
assert!(record_in_layer(&w, Layer::Wiki));
assert!(!record_in_layer(&w, Layer::Sources));
}
#[test]
fn filter_candidates_skips_already_applied_where_clause() {
let q = Query::new()
.with_where("company", "acme")
.with_where("status", "active");
let keep = rec(
"records/contacts/keep.md",
"contact",
&[
("company", Value::String("acme".into())),
("status", Value::String("active".into())),
],
);
let drop = rec(
"records/contacts/drop.md",
"contact",
&[
("company", Value::String("acme".into())),
("status", Value::String("churned".into())),
],
);
let out = q.filter_candidates(vec![keep, drop], false, 1);
assert_eq!(
paths(&out),
path_set(&["records/contacts/keep.md"]),
"the second clause is enforced even when the first is pre-applied"
);
}
#[test]
fn filter_candidates_enforces_type_when_not_preapplied() {
let q = Query::new().with_type("contact");
let contact = rec("records/contacts/c.md", "contact", &[]);
let company = rec("records/companies/co.md", "company", &[]);
let out = q.filter_candidates(vec![contact, company], false, 0);
assert_eq!(paths(&out), path_set(&["records/contacts/c.md"]));
}
#[test]
fn fixture_canonical_folders_match_store_expectations() {
let contacts = [rec("records/contacts/x.md", "contact", &[])];
let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
let got = store.find_by_type("contact").unwrap();
assert_eq!(got.len(), 1, "fixture folder == store's canonical folder");
}
}