Skip to main content

dbmd_core/
query.rs

1//! `query` — Dataview-style filters, **sidecar-backed**.
2//!
3//! Resolves against the type-folder `index.jsonl` sidecar(s) via
4//! [`Store::find_by_type`] / [`Store::find_by_where`] /
5//! [`Store::read_type_index`] — one sequential, complete read per type-folder,
6//! cold-cache-proof — **never** a walk-and-parse. Returns full
7//! [`IndexRecord`]s straight from the sidecar (path + fields + summary +
8//! links); the caller opens the underlying file only if it needs the body.
9//!
10//! Backs `dbmd search --type/--where`, `dbmd fm query`, `dbmd index query`, and
11//! `dbmd graph backlinks --type/--in`.
12
13use serde_json::Value;
14
15use crate::index::IndexRecord;
16use crate::store::{Layer, Store, StoreError};
17
18/// A composable, sidecar-backed filter over a store's records.
19///
20/// Build with [`Query::new`] and the `with_*` methods, then [`Query::execute`].
21/// Multiple [`Query::with_where`] clauses AND together (intersection over the
22/// sidecar records).
23#[derive(Debug, Clone, Default)]
24pub struct Query {
25    /// `type` predicate (`with_type`).
26    type_: Option<String>,
27    /// Layer scope (`with_layer` / `--in <layer>`).
28    layer: Option<Layer>,
29    /// `key=value` frontmatter predicates, ANDed.
30    wheres: Vec<(String, String)>,
31}
32
33impl Query {
34    /// Start a new, empty query (matches everything until narrowed).
35    pub fn new() -> Self {
36        Self::default()
37    }
38
39    /// Restrict to a single `type` (frontmatter `type` predicate).
40    ///
41    /// Setting it again replaces the previous value — a query has at most one
42    /// `type` (a record carries exactly one `type`, so two types would never
43    /// intersect).
44    pub fn with_type(mut self, type_: &str) -> Self {
45        self.type_ = Some(type_.to_string());
46        self
47    }
48
49    /// Restrict to one layer (`Sources` / `Records` / `Wiki`) — scopes which
50    /// sidecars' records survive. Setting it again replaces the previous layer.
51    pub fn with_layer(mut self, layer: Layer) -> Self {
52        self.layer = Some(layer);
53        self
54    }
55
56    /// Add a `key=value` frontmatter predicate; chains as AND with any others
57    /// (intersection over the sidecar records). Repeating the same `key` adds a
58    /// second clause — both must hold — rather than replacing the first.
59    pub fn with_where(mut self, key: &str, value: &str) -> Self {
60        self.wheres.push((key.to_string(), value.to_string()));
61        self
62    }
63
64    /// Resolve the query against the relevant type-folder `index.jsonl`
65    /// sidecar(s) and return the matching [`IndexRecord`]s — complete, one
66    /// sequential read per type-folder, no whole-store walk.
67    ///
68    /// The candidate set comes from the most selective frozen sidecar reader:
69    /// [`Store::find_by_type`] when a `type` is set (one type-folder's
70    /// sidecars), otherwise [`Store::find_by_where_in`] on the first `where`
71    /// clause — and that reader is **layer-scoped** when [`with_layer`] is set,
72    /// so a `--where`-only query reads only the named layer's sidecars instead
73    /// of the whole store (O(entities-in-layer), the interactive-loop contract).
74    /// The layer scope and every remaining predicate are then applied in memory
75    /// over the returned records — no extra sidecar reads, no walk.
76    ///
77    /// [`with_layer`]: Query::with_layer
78    ///
79    /// A query that constrains neither `type` nor any `where` clause selects no
80    /// sidecar (a bare or layer-only query has no walk-free candidate set under
81    /// the sidecar API) and returns an empty result; the CLI always supplies a
82    /// `--type` or a `--where`.
83    pub fn execute(&self, store: &Store) -> Result<Vec<IndexRecord>, StoreError> {
84        // Pick the candidate set from the cheapest frozen sidecar reader, and
85        // remember which predicates that reader has already satisfied so the
86        // in-memory pass doesn't re-test them.
87        let (candidates, type_done, where_done) = if let Some(type_) = &self.type_ {
88            // `find_by_type` reads the type's canonical sidecar (or, when that
89            // folder isn't indexed yet, the sidecars of just that type's layer —
90            // never the whole store); every record it returns already has the
91            // right `type`.
92            (store.find_by_type(type_)?, true, 0)
93        } else if let Some((key, value)) = self.wheres.first() {
94            // No type to scope on: let the first `where` clause pick the
95            // sidecars and pre-filter. `self.layer` (when set) confines the
96            // sidecar walk to that layer's subtree, so a `--where`-only query
97            // is O(entities-in-layer), not O(store records) — the in-memory
98            // layer filter below then becomes a no-op for this path. The
99            // remaining clauses AND in memory.
100            (store.find_by_where_in(key, value, self.layer)?, false, 1)
101        } else {
102            // Nothing selects a sidecar: no walk-free candidate set exists.
103            return Ok(Vec::new());
104        };
105
106        Ok(self.filter_candidates(candidates, type_done, where_done))
107    }
108
109    /// Apply the in-memory predicate pass over a candidate set returned by a
110    /// sidecar reader: the `type` predicate (unless `type_already_applied`,
111    /// because [`Store::find_by_type`] guarantees it), the [`with_layer`] scope,
112    /// and every remaining `where` clause (skipping the first
113    /// `wheres_already_applied`, which [`Store::find_by_where`] pre-filtered).
114    /// All surviving predicates AND together.
115    ///
116    /// Split out from [`Query::execute`] so the composition is exercisable over
117    /// hand-built [`IndexRecord`]s independent of the sidecar I/O.
118    ///
119    /// [`with_layer`]: Query::with_layer
120    fn filter_candidates(
121        &self,
122        candidates: Vec<IndexRecord>,
123        type_already_applied: bool,
124        wheres_already_applied: usize,
125    ) -> Vec<IndexRecord> {
126        candidates
127            .into_iter()
128            .filter(|record| {
129                if !type_already_applied {
130                    if let Some(type_) = &self.type_ {
131                        if record.type_ != *type_ {
132                            return false;
133                        }
134                    }
135                }
136                if let Some(layer) = self.layer {
137                    if !record_in_layer(record, layer) {
138                        return false;
139                    }
140                }
141                self.wheres
142                    .iter()
143                    .skip(wheres_already_applied)
144                    .all(|(key, value)| record_matches_where(record, key, value))
145            })
146            .collect()
147    }
148}
149
150/// True if `record`'s store-relative `path` lives under `layer`'s top-level
151/// folder (`sources/` / `records/` / `wiki/`). The sidecar readers can return
152/// records from any layer (a `type` folder name is not unique across layers),
153/// so a `with_layer` scope is enforced here on the record's path.
154fn record_in_layer(record: &IndexRecord, layer: Layer) -> bool {
155    record
156        .path
157        .components()
158        .next()
159        .and_then(|c| c.as_os_str().to_str())
160        == Some(layer_dir_name(layer))
161}
162
163/// The top-level folder name for a [`Layer`] (`"sources"` / `"records"` /
164/// `"wiki"`). Kept local so the layer-scope filter is self-contained and does
165/// not couple `query` to the store-walk module's dir-name helpers.
166fn layer_dir_name(layer: Layer) -> &'static str {
167    match layer {
168        Layer::Sources => "sources",
169        Layer::Records => "records",
170        Layer::Wiki => "wiki",
171    }
172}
173
174/// True if `record` satisfies a single `key=value` frontmatter predicate.
175///
176/// The universal-contract keys map to their typed [`IndexRecord`] columns
177/// (`type`, `summary`, `created`, `updated`, plus the list-valued `tags` /
178/// `links` which match when `value` is one of the members); every other key is
179/// looked up in [`IndexRecord::fields`] and compared with
180/// [`json_value_matches`]. An absent key never matches.
181fn record_matches_where(record: &IndexRecord, key: &str, value: &str) -> bool {
182    match key {
183        "type" => record.type_ == value,
184        "summary" => record.summary == value,
185        "path" => record.path.to_str() == Some(value),
186        // List-valued columns match on membership: `tags=urgent` is true when
187        // `urgent` is one of the file's tags.
188        "tags" => record.tags.iter().any(|t| t == value),
189        "links" => record.links.iter().any(|l| l == value),
190        // Timestamps compare on their canonical RFC3339 string form so a query
191        // can pin an exact `created` / `updated`.
192        "created" => record.created.map(|t| t.to_rfc3339()).as_deref() == Some(value),
193        "updated" => record.updated.map(|t| t.to_rfc3339()).as_deref() == Some(value),
194        _ => record
195            .fields
196            .get(key)
197            .is_some_and(|v| json_value_matches(v, value)),
198    }
199}
200
201/// Compare a sidecar [`Value`] against the string `value` from a `key=value`
202/// predicate. The CLI surface is all strings, so matching is defined against
203/// the value's natural string form:
204///
205/// - a string matches when equal;
206/// - a number matches when its canonical render equals `value` (so `42` matches
207///   `"42"`, and `12.5` matches `"12.5"`);
208/// - a bool matches `"true"` / `"false"`;
209/// - an array matches when **any** element matches (so a list-valued custom
210///   field behaves like `tags` — membership, not whole-list equality);
211/// - `null` never matches.
212fn json_value_matches(value: &Value, target: &str) -> bool {
213    match value {
214        Value::String(s) => s == target,
215        Value::Number(n) => n.to_string() == target,
216        Value::Bool(b) => b.to_string() == target,
217        Value::Array(items) => items.iter().any(|item| json_value_matches(item, target)),
218        Value::Null => false,
219        // Objects have no scalar form a `key=value` predicate can match.
220        Value::Object(_) => false,
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227    use crate::store::Store;
228    use std::fs;
229    use std::path::PathBuf;
230    use tempfile::TempDir;
231
232    // ── Fixtures ─────────────────────────────────────────────────────────────
233
234    /// Build an [`IndexRecord`] with the given store-relative path, type, and
235    /// extra (`fields`) frontmatter, leaving the timestamp/list columns empty.
236    /// Tests that need `tags`/`links`/`created` set them on the returned value.
237    fn rec(path: &str, type_: &str, fields: &[(&str, Value)]) -> IndexRecord {
238        IndexRecord {
239            path: PathBuf::from(path),
240            type_: type_.to_string(),
241            summary: format!("summary of {path}"),
242            tags: Vec::new(),
243            links: Vec::new(),
244            created: None,
245            updated: None,
246            fields: fields
247                .iter()
248                .map(|(k, v)| (k.to_string(), v.clone()))
249                .collect(),
250        }
251    }
252
253    /// Serialize one record to a single JSONL line (what a real sidecar holds).
254    fn jsonl_line(record: &IndexRecord) -> String {
255        serde_json::to_string(record).expect("serialize IndexRecord")
256    }
257
258    /// A minimal but valid `DB.md` marker (a `---` frontmatter block, which
259    /// `parse_db_md` requires; the body is empty so the config is the default).
260    const DB_MD: &str = "---\ntype: db-md\n---\n\n# Test store\n";
261
262    /// Write a temp store: a `DB.md` marker plus an `index.jsonl` sidecar at
263    /// each `(store-relative folder, records)` entry. Returns the temp dir
264    /// (kept alive by the caller) and the opened [`Store`].
265    fn store_with_sidecars(sidecars: &[(&str, &[IndexRecord])]) -> (TempDir, Store) {
266        let dir = TempDir::new().expect("temp dir");
267        let root = dir.path();
268        fs::write(root.join("DB.md"), DB_MD).expect("write DB.md");
269
270        for (folder, records) in sidecars {
271            let folder_abs = root.join(folder);
272            fs::create_dir_all(&folder_abs).expect("create type folder");
273            let body: String = records
274                .iter()
275                .map(|r| format!("{}\n", jsonl_line(r)))
276                .collect();
277            fs::write(folder_abs.join("index.jsonl"), body).expect("write index.jsonl");
278        }
279
280        let store = Store::open(root).expect("open store");
281        (dir, store)
282    }
283
284    /// The set of store-relative path strings in a result set, for order-
285    /// independent assertions.
286    fn paths(records: &[IndexRecord]) -> std::collections::BTreeSet<String> {
287        records
288            .iter()
289            .map(|r| r.path.to_string_lossy().into_owned())
290            .collect()
291    }
292
293    fn path_set(items: &[&str]) -> std::collections::BTreeSet<String> {
294        items.iter().map(|s| s.to_string()).collect()
295    }
296
297    // ── Builder state ────────────────────────────────────────────────────────
298
299    #[test]
300    fn builder_accumulates_predicates() {
301        let q = Query::new()
302            .with_type("contact")
303            .with_layer(Layer::Records)
304            .with_where("company", "acme")
305            .with_where("status", "active");
306
307        assert_eq!(q.type_.as_deref(), Some("contact"));
308        assert_eq!(q.layer, Some(Layer::Records));
309        assert_eq!(
310            q.wheres,
311            vec![
312                ("company".to_string(), "acme".to_string()),
313                ("status".to_string(), "active".to_string()),
314            ],
315            "each with_where appends a distinct clause"
316        );
317    }
318
319    #[test]
320    fn with_type_and_with_layer_replace_rather_than_stack() {
321        let q = Query::new()
322            .with_type("contact")
323            .with_type("company")
324            .with_layer(Layer::Sources)
325            .with_layer(Layer::Wiki);
326        assert_eq!(q.type_.as_deref(), Some("company"));
327        assert_eq!(q.layer, Some(Layer::Wiki));
328    }
329
330    #[test]
331    fn repeated_with_where_same_key_keeps_both_clauses() {
332        // Two clauses on the same key must both be retained (range-style AND),
333        // not collapsed to the last one.
334        let q = Query::new()
335            .with_where("updated", "2026-01-01T00:00:00+00:00")
336            .with_where("updated", "2026-02-01T00:00:00+00:00");
337        assert_eq!(q.wheres.len(), 2);
338    }
339
340    // ── execute: real sidecars on disk ───────────────────────────────────────
341
342    #[test]
343    fn execute_with_type_returns_only_that_types_folder() {
344        let contacts = [
345            rec("records/contacts/sarah.md", "contact", &[]),
346            rec("records/contacts/mara.md", "contact", &[]),
347        ];
348        let companies = [rec("records/companies/acme.md", "company", &[])];
349        let (_dir, store) = store_with_sidecars(&[
350            ("records/contacts", &contacts),
351            ("records/companies", &companies),
352        ]);
353
354        let got = Query::new().with_type("contact").execute(&store).unwrap();
355
356        assert_eq!(
357            paths(&got),
358            path_set(&["records/contacts/sarah.md", "records/contacts/mara.md"]),
359            "a type query reads its own type-folder sidecar and excludes other types"
360        );
361    }
362
363    #[test]
364    fn execute_type_plus_where_intersects_on_a_custom_field() {
365        let contacts = [
366            rec(
367                "records/contacts/sarah.md",
368                "contact",
369                &[("company", Value::String("acme".into()))],
370            ),
371            rec(
372                "records/contacts/mara.md",
373                "contact",
374                &[("company", Value::String("globex".into()))],
375            ),
376            rec("records/contacts/no-company.md", "contact", &[]),
377        ];
378        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
379
380        let got = Query::new()
381            .with_type("contact")
382            .with_where("company", "acme")
383            .execute(&store)
384            .unwrap();
385
386        assert_eq!(
387            paths(&got),
388            path_set(&["records/contacts/sarah.md"]),
389            "the where clause narrows the type's records to the matching field; \
390             a record missing the key does not match"
391        );
392    }
393
394    #[test]
395    fn execute_multiple_where_clauses_and_together() {
396        let contacts = [
397            rec(
398                "records/contacts/a.md",
399                "contact",
400                &[
401                    ("company", Value::String("acme".into())),
402                    ("status", Value::String("active".into())),
403                ],
404            ),
405            rec(
406                "records/contacts/b.md",
407                "contact",
408                &[
409                    ("company", Value::String("acme".into())),
410                    ("status", Value::String("churned".into())),
411                ],
412            ),
413            rec(
414                "records/contacts/c.md",
415                "contact",
416                &[
417                    ("company", Value::String("globex".into())),
418                    ("status", Value::String("active".into())),
419                ],
420            ),
421        ];
422        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
423
424        let got = Query::new()
425            .with_type("contact")
426            .with_where("company", "acme")
427            .with_where("status", "active")
428            .execute(&store)
429            .unwrap();
430
431        // Only `a` satisfies BOTH clauses. If the clauses were OR'd, `b` and `c`
432        // would leak in.
433        assert_eq!(paths(&got), path_set(&["records/contacts/a.md"]));
434    }
435
436    #[test]
437    fn execute_where_without_type_reads_across_sidecars() {
438        // `find_by_where` scans every sidecar; the same `domain` value lives in
439        // both a contact and a company record, and both come back.
440        let contacts = [rec(
441            "records/contacts/sarah.md",
442            "contact",
443            &[("domain", Value::String("acme.com".into()))],
444        )];
445        let companies = [
446            rec(
447                "records/companies/acme.md",
448                "company",
449                &[("domain", Value::String("acme.com".into()))],
450            ),
451            rec(
452                "records/companies/globex.md",
453                "company",
454                &[("domain", Value::String("globex.com".into()))],
455            ),
456        ];
457        let (_dir, store) = store_with_sidecars(&[
458            ("records/contacts", &contacts),
459            ("records/companies", &companies),
460        ]);
461
462        let got = Query::new()
463            .with_where("domain", "acme.com")
464            .execute(&store)
465            .unwrap();
466
467        assert_eq!(
468            paths(&got),
469            path_set(&["records/contacts/sarah.md", "records/companies/acme.md"]),
470            "a where-only query matches the field across every type-folder sidecar"
471        );
472    }
473
474    #[test]
475    fn execute_with_layer_scopes_by_path() {
476        // Same custom field value present in two layers; the layer scope must
477        // keep only the records under the named layer folder.
478        let source_recs = [rec(
479            "sources/notes/n1.md",
480            "note",
481            &[("topic", Value::String("billing".into()))],
482        )];
483        let record_recs = [rec(
484            "records/notes/n2.md",
485            "note",
486            &[("topic", Value::String("billing".into()))],
487        )];
488        let (_dir, store) = store_with_sidecars(&[
489            ("sources/notes", &source_recs),
490            ("records/notes", &record_recs),
491        ]);
492
493        // Without a layer scope, both layers' records match.
494        let unscoped = Query::new()
495            .with_where("topic", "billing")
496            .execute(&store)
497            .unwrap();
498        assert_eq!(
499            paths(&unscoped),
500            path_set(&["sources/notes/n1.md", "records/notes/n2.md"]),
501        );
502
503        // Scoped to Sources, only the sources-layer record survives.
504        let scoped = Query::new()
505            .with_where("topic", "billing")
506            .with_layer(Layer::Sources)
507            .execute(&store)
508            .unwrap();
509        assert_eq!(
510            paths(&scoped),
511            path_set(&["sources/notes/n1.md"]),
512            "with_layer(Sources) drops the records/-layer record"
513        );
514    }
515
516    #[test]
517    fn execute_where_only_with_layer_confines_sidecar_io_not_just_result() {
518        // The O(entities-in-layer) contract for a `--where`-only query (no
519        // `--type`): `--in <layer>` must scope the *sidecar read*, not merely
520        // filter the result after a whole-store read. Proven structurally — a
521        // corrupt sidecar in another layer would make the read error if it were
522        // touched, so a layer-scoped query that SUCCEEDS is proof the
523        // out-of-scope layer's I/O never happened.
524        let dir = TempDir::new().unwrap();
525        let root = dir.path();
526        fs::write(root.join("DB.md"), DB_MD).unwrap();
527
528        // In-scope layer: a valid sidecar with the matching record.
529        let records_dir = root.join("records/contacts");
530        fs::create_dir_all(&records_dir).unwrap();
531        let match_rec = rec(
532            "records/contacts/sarah.md",
533            "contact",
534            &[("domain", Value::String("acme.com".into()))],
535        );
536        fs::write(
537            records_dir.join("index.jsonl"),
538            format!("{}\n", jsonl_line(&match_rec)),
539        )
540        .unwrap();
541
542        // Out-of-scope layer: a CORRUPT sidecar. If a `--in records` query read
543        // it, `read_type_index` would error.
544        let sources_dir = root.join("sources/emails");
545        fs::create_dir_all(&sources_dir).unwrap();
546        fs::write(sources_dir.join("index.jsonl"), "{ not valid json }\n").unwrap();
547
548        let store = Store::open(root).unwrap();
549
550        // Scoped to records: succeeds and returns only the records-layer match,
551        // because the corrupt sources sidecar was never walked.
552        let scoped = Query::new()
553            .with_where("domain", "acme.com")
554            .with_layer(Layer::Records)
555            .execute(&store)
556            .expect("a records-scoped where query must not read the sources sidecar");
557        assert_eq!(paths(&scoped), path_set(&["records/contacts/sarah.md"]));
558
559        // Unscoped: the same query DOES walk every layer and trips over the
560        // corrupt sidecar — proving the corrupt file is real and that only the
561        // layer scope spared the scoped read from reading it.
562        let unscoped = Query::new()
563            .with_where("domain", "acme.com")
564            .execute(&store);
565        assert!(
566            unscoped.is_err(),
567            "an unscoped where query reads every sidecar, including the corrupt one"
568        );
569    }
570
571    #[test]
572    fn execute_full_composition_type_layer_where() {
573        let contacts = [
574            rec(
575                "records/contacts/match.md",
576                "contact",
577                &[("city", Value::String("denver".into()))],
578            ),
579            rec(
580                "records/contacts/wrong-city.md",
581                "contact",
582                &[("city", Value::String("austin".into()))],
583            ),
584        ];
585        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
586
587        let got = Query::new()
588            .with_type("contact")
589            .with_layer(Layer::Records)
590            .with_where("city", "denver")
591            .execute(&store)
592            .unwrap();
593        assert_eq!(paths(&got), path_set(&["records/contacts/match.md"]));
594
595        // The same query scoped to the wrong layer yields nothing, proving the
596        // layer predicate is live in the composed path.
597        let wrong_layer = Query::new()
598            .with_type("contact")
599            .with_layer(Layer::Wiki)
600            .with_where("city", "denver")
601            .execute(&store)
602            .unwrap();
603        assert!(wrong_layer.is_empty());
604    }
605
606    #[test]
607    fn execute_empty_query_selects_no_sidecar() {
608        // A query with neither a type nor a where clause has no walk-free
609        // candidate set and must return empty WITHOUT touching the store walk.
610        let contacts = [rec("records/contacts/sarah.md", "contact", &[])];
611        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
612
613        let got = Query::new().execute(&store).unwrap();
614        assert!(
615            got.is_empty(),
616            "an unconstrained query resolves to empty, not to every record"
617        );
618
619        // A layer-only query likewise selects no sidecar (no type/where to pick
620        // one), so it is empty too — even though records exist in that layer.
621        let layer_only = Query::new()
622            .with_layer(Layer::Records)
623            .execute(&store)
624            .unwrap();
625        assert!(layer_only.is_empty());
626    }
627
628    #[test]
629    fn execute_tag_membership_via_where() {
630        let mut urgent = rec("records/tasks/t1.md", "task", &[]);
631        urgent.tags = vec!["urgent".into(), "ops".into()];
632        let mut calm = rec("records/tasks/t2.md", "task", &[]);
633        calm.tags = vec!["ops".into()];
634        let recs = [urgent, calm];
635        let (_dir, store) = store_with_sidecars(&[("records/tasks", &recs)]);
636
637        let got = Query::new()
638            .with_type("task")
639            .with_where("tags", "urgent")
640            .execute(&store)
641            .unwrap();
642        assert_eq!(
643            paths(&got),
644            path_set(&["records/tasks/t1.md"]),
645            "tags match on membership: only the record carrying the tag matches"
646        );
647    }
648
649    #[test]
650    fn execute_matches_numeric_and_bool_fields_from_string_predicate() {
651        let recs = [
652            rec(
653                "records/invoices/paid.md",
654                "invoice",
655                &[
656                    ("amount", Value::Number(42.into())),
657                    ("paid", Value::Bool(true)),
658                ],
659            ),
660            rec(
661                "records/invoices/unpaid.md",
662                "invoice",
663                &[
664                    ("amount", Value::Number(99.into())),
665                    ("paid", Value::Bool(false)),
666                ],
667            ),
668        ];
669        let (_dir, store) = store_with_sidecars(&[("records/invoices", &recs)]);
670
671        let by_amount = Query::new()
672            .with_type("invoice")
673            .with_where("amount", "42")
674            .execute(&store)
675            .unwrap();
676        assert_eq!(
677            paths(&by_amount),
678            path_set(&["records/invoices/paid.md"]),
679            "a JSON number matches the string form of the predicate"
680        );
681
682        let by_paid = Query::new()
683            .with_type("invoice")
684            .with_where("paid", "true")
685            .execute(&store)
686            .unwrap();
687        assert_eq!(
688            paths(&by_paid),
689            path_set(&["records/invoices/paid.md"]),
690            "a JSON bool matches \"true\"/\"false\""
691        );
692    }
693
694    #[test]
695    fn execute_honors_last_write_wins_in_sidecar() {
696        // Two JSONL lines for the same path: the later supersedes the earlier
697        // (read_type_index applies last-write-wins). A query on the superseding
698        // field must match, and one on the superseded field must not.
699        let dir = TempDir::new().unwrap();
700        let root = dir.path();
701        fs::write(root.join("DB.md"), DB_MD).unwrap();
702        let folder = root.join("records/contacts");
703        fs::create_dir_all(&folder).unwrap();
704
705        let old = rec(
706            "records/contacts/sarah.md",
707            "contact",
708            &[("status", Value::String("lead".into()))],
709        );
710        let new = rec(
711            "records/contacts/sarah.md",
712            "contact",
713            &[("status", Value::String("customer".into()))],
714        );
715        fs::write(
716            folder.join("index.jsonl"),
717            format!("{}\n{}\n", jsonl_line(&old), jsonl_line(&new)),
718        )
719        .unwrap();
720        let store = Store::open(root).unwrap();
721
722        let superseding = Query::new()
723            .with_type("contact")
724            .with_where("status", "customer")
725            .execute(&store)
726            .unwrap();
727        assert_eq!(superseding.len(), 1, "the superseding line's value matches");
728
729        let superseded = Query::new()
730            .with_type("contact")
731            .with_where("status", "lead")
732            .execute(&store)
733            .unwrap();
734        assert!(
735            superseded.is_empty(),
736            "the superseded line's value no longer matches after last-write-wins"
737        );
738    }
739
740    #[test]
741    fn execute_returns_full_records_not_just_paths() {
742        // The contract returns full IndexRecords straight from the sidecar:
743        // summary, tags, links, and fields must survive the round-trip.
744        let mut r = rec(
745            "records/contacts/sarah.md",
746            "contact",
747            &[("company", Value::String("acme".into()))],
748        );
749        r.summary = "Renewal champion".into();
750        r.tags = vec!["vip".into()];
751        r.links = vec!["wiki/people/sarah-chen.md".into()];
752        let recs = [r];
753        let (_dir, store) = store_with_sidecars(&[("records/contacts", &recs)]);
754
755        let got = Query::new().with_type("contact").execute(&store).unwrap();
756        assert_eq!(got.len(), 1);
757        let only = &got[0];
758        assert_eq!(only.summary, "Renewal champion");
759        assert_eq!(only.tags, vec!["vip".to_string()]);
760        assert_eq!(only.links, vec!["wiki/people/sarah-chen.md".to_string()]);
761        assert_eq!(
762            only.fields.get("company"),
763            Some(&Value::String("acme".into())),
764            "type-specific fields come back verbatim for on-demand use"
765        );
766    }
767
768    // ── Pure matcher logic (no store I/O) ────────────────────────────────────
769
770    #[test]
771    fn record_matches_where_on_typed_columns() {
772        let mut r = rec("records/contacts/x.md", "contact", &[]);
773        r.summary = "hello".into();
774
775        assert!(record_matches_where(&r, "type", "contact"));
776        assert!(!record_matches_where(&r, "type", "company"));
777        assert!(record_matches_where(&r, "summary", "hello"));
778        assert!(!record_matches_where(&r, "summary", "goodbye"));
779        assert!(record_matches_where(&r, "path", "records/contacts/x.md"));
780        assert!(!record_matches_where(&r, "path", "records/contacts/y.md"));
781    }
782
783    #[test]
784    fn record_matches_where_on_timestamps_uses_rfc3339() {
785        let mut r = rec("records/meetings/m.md", "meeting", &[]);
786        let ts = chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00+00:00").unwrap();
787        r.created = Some(ts);
788
789        assert!(record_matches_where(
790            &r,
791            "created",
792            "2026-05-29T12:00:00+00:00"
793        ));
794        assert!(!record_matches_where(
795            &r,
796            "created",
797            "2026-05-29T13:00:00+00:00"
798        ));
799        // `updated` is unset → never matches, even the same instant.
800        assert!(!record_matches_where(
801            &r,
802            "updated",
803            "2026-05-29T12:00:00+00:00"
804        ));
805    }
806
807    #[test]
808    fn record_matches_where_absent_field_is_false() {
809        let r = rec("records/contacts/x.md", "contact", &[]);
810        assert!(
811            !record_matches_where(&r, "nonexistent", "anything"),
812            "an absent frontmatter key never matches"
813        );
814    }
815
816    #[test]
817    fn json_value_matches_covers_scalars_and_arrays() {
818        assert!(json_value_matches(&Value::String("acme".into()), "acme"));
819        assert!(!json_value_matches(&Value::String("acme".into()), "globex"));
820
821        assert!(json_value_matches(&Value::Number(42.into()), "42"));
822        assert!(!json_value_matches(&Value::Number(42.into()), "43"));
823
824        assert!(json_value_matches(&Value::Bool(true), "true"));
825        assert!(json_value_matches(&Value::Bool(false), "false"));
826        assert!(!json_value_matches(&Value::Bool(true), "false"));
827
828        let arr = Value::Array(vec![Value::String("a".into()), Value::String("b".into())]);
829        assert!(json_value_matches(&arr, "b"), "array matches on membership");
830        assert!(!json_value_matches(&arr, "c"));
831    }
832
833    #[test]
834    fn json_value_matches_null_and_object_never_match() {
835        assert!(!json_value_matches(&Value::Null, ""));
836        assert!(!json_value_matches(&Value::Null, "null"));
837        let obj = serde_json::json!({"k": "v"});
838        assert!(!json_value_matches(&obj, "v"));
839    }
840
841    #[test]
842    fn record_in_layer_keys_off_first_path_component() {
843        let s = rec("sources/emails/e.md", "email", &[]);
844        let r = rec("records/contacts/c.md", "contact", &[]);
845        let w = rec("wiki/people/p.md", "wiki-page", &[]);
846
847        assert!(record_in_layer(&s, Layer::Sources));
848        assert!(!record_in_layer(&s, Layer::Records));
849        assert!(record_in_layer(&r, Layer::Records));
850        assert!(!record_in_layer(&r, Layer::Wiki));
851        assert!(record_in_layer(&w, Layer::Wiki));
852        assert!(!record_in_layer(&w, Layer::Sources));
853    }
854
855    #[test]
856    fn filter_candidates_skips_already_applied_where_clause() {
857        // Simulate the find_by_where path: the first clause is "already applied"
858        // by the sidecar reader, so filter_candidates must skip it and only
859        // enforce the remaining clause. A record satisfying only the (skipped)
860        // first clause but NOT the second must still be dropped.
861        let q = Query::new()
862            .with_where("company", "acme")
863            .with_where("status", "active");
864
865        let keep = rec(
866            "records/contacts/keep.md",
867            "contact",
868            &[
869                ("company", Value::String("acme".into())),
870                ("status", Value::String("active".into())),
871            ],
872        );
873        let drop = rec(
874            "records/contacts/drop.md",
875            "contact",
876            &[
877                ("company", Value::String("acme".into())),
878                ("status", Value::String("churned".into())),
879            ],
880        );
881
882        let out = q.filter_candidates(vec![keep, drop], false, 1);
883        assert_eq!(
884            paths(&out),
885            path_set(&["records/contacts/keep.md"]),
886            "the second clause is enforced even when the first is pre-applied"
887        );
888    }
889
890    #[test]
891    fn filter_candidates_enforces_type_when_not_preapplied() {
892        // When the candidate set did NOT come from find_by_type (type_applied =
893        // false), filter_candidates must still drop records of the wrong type.
894        let q = Query::new().with_type("contact");
895        let contact = rec("records/contacts/c.md", "contact", &[]);
896        let company = rec("records/companies/co.md", "company", &[]);
897
898        let out = q.filter_candidates(vec![contact, company], false, 0);
899        assert_eq!(paths(&out), path_set(&["records/contacts/c.md"]));
900    }
901
902    /// Local guard: the test fixtures write sidecars under the same canonical
903    /// folders the store reader derives, so a `with_type` query finds them.
904    /// If this drifts, the integration tests above silently weaken — assert the
905    /// convention explicitly.
906    #[test]
907    fn fixture_canonical_folders_match_store_expectations() {
908        let contacts = [rec("records/contacts/x.md", "contact", &[])];
909        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
910        // `contact` records live at records/contacts/ — the same folder the
911        // fixture wrote — so the type read is non-empty.
912        let got = store.find_by_type("contact").unwrap();
913        assert_eq!(got.len(), 1, "fixture folder == store's canonical folder");
914    }
915}