Skip to main content

dbmd_core/
query.rs

1//! `query` — Dataview-style filters, **sidecar-backed**.
2//!
3//! Resolves against the type-folder `index.jsonl` sidecar(s) via
4//! [`Store::find_by_type`] / [`Store::find_by_where`] /
5//! [`Store::read_type_index`] — one sequential, complete read per type-folder,
6//! cold-cache-proof — **never** a walk-and-parse. Returns full
7//! [`IndexRecord`]s straight from the sidecar (path + fields + summary +
8//! links); the caller opens the underlying file only if it needs the body.
9//!
10//! Backs `dbmd search --type/--where`, `dbmd fm query`, `dbmd index query`, and
11//! `dbmd graph backlinks --type/--in`.
12
13use chrono::{DateTime, FixedOffset};
14use serde_json::Value;
15
16use crate::index::IndexRecord;
17use crate::store::{Layer, Store, StoreError};
18
19/// A composable, sidecar-backed filter over a store's records.
20///
21/// Build with [`Query::new`] and the `with_*` methods, then [`Query::execute`].
22/// Multiple [`Query::with_where`] clauses AND together (intersection over the
23/// sidecar records).
24#[derive(Debug, Clone, Default)]
25pub struct Query {
26    /// `type` predicate (`with_type`).
27    type_: Option<String>,
28    /// Layer scope (`with_layer` / `--in <layer>`).
29    layer: Option<Layer>,
30    /// `key=value` frontmatter predicates, ANDed.
31    wheres: Vec<(String, String)>,
32}
33
34impl Query {
35    /// Start a new, empty query (matches everything until narrowed).
36    pub fn new() -> Self {
37        Self::default()
38    }
39
40    /// Restrict to a single `type` (frontmatter `type` predicate).
41    ///
42    /// Setting it again replaces the previous value — a query has at most one
43    /// `type` (a record carries exactly one `type`, so two types would never
44    /// intersect).
45    pub fn with_type(mut self, type_: &str) -> Self {
46        self.type_ = Some(type_.to_string());
47        self
48    }
49
50    /// Restrict to one layer (`Sources` / `Records` / `Wiki`) — scopes which
51    /// sidecars' records survive. Setting it again replaces the previous layer.
52    pub fn with_layer(mut self, layer: Layer) -> Self {
53        self.layer = Some(layer);
54        self
55    }
56
57    /// Add a `key=value` frontmatter predicate; chains as AND with any others
58    /// (intersection over the sidecar records). Repeating the same `key` adds a
59    /// second clause — both must hold — rather than replacing the first.
60    pub fn with_where(mut self, key: &str, value: &str) -> Self {
61        self.wheres.push((key.to_string(), value.to_string()));
62        self
63    }
64
65    /// Resolve the query against the relevant type-folder `index.jsonl`
66    /// sidecar(s) and return the matching [`IndexRecord`]s — complete, one
67    /// sequential read per type-folder, no whole-store walk.
68    ///
69    /// The candidate set comes from the most selective frozen sidecar reader:
70    /// [`Store::find_by_type`] when a `type` is set (one type-folder's
71    /// sidecars), otherwise [`Store::find_by_where_in`] on the first `where`
72    /// clause — and that reader is **layer-scoped** when [`with_layer`] is set,
73    /// so a `--where`-only query reads only the named layer's sidecars instead
74    /// of the whole store (O(entities-in-layer), the interactive-loop contract).
75    /// The layer scope and every remaining predicate are then applied in memory
76    /// over the returned records — no extra sidecar reads, no walk.
77    ///
78    /// [`with_layer`]: Query::with_layer
79    ///
80    /// A query that constrains neither `type` nor any `where` clause selects no
81    /// sidecar (a bare or layer-only query has no walk-free candidate set under
82    /// the sidecar API) and returns an empty result; the CLI always supplies a
83    /// `--type` or a `--where`.
84    pub fn execute(&self, store: &Store) -> Result<Vec<IndexRecord>, StoreError> {
85        // Pick the candidate set from the cheapest frozen sidecar reader, and
86        // remember which predicates that reader has already satisfied so the
87        // in-memory pass doesn't re-test them.
88        let (candidates, type_done, where_done) = if let Some(type_) = &self.type_ {
89            // `find_by_type` reads the type's canonical sidecar (or, when that
90            // folder isn't indexed yet, the sidecars of just that type's layer —
91            // never the whole store); every record it returns already has the
92            // right `type`.
93            (store.find_by_type(type_)?, true, 0)
94        } else if let Some((key, value)) = self.wheres.first() {
95            // No type to scope on: let the first `where` clause pick the
96            // sidecars and pre-filter. `self.layer` (when set) confines the
97            // sidecar walk to that layer's subtree, so a `--where`-only query
98            // is O(entities-in-layer), not O(store records) — the in-memory
99            // layer filter below then becomes a no-op for this path. The
100            // remaining clauses AND in memory.
101            (store.find_by_where_in(key, value, self.layer)?, false, 1)
102        } else {
103            // Nothing selects a sidecar: no walk-free candidate set exists.
104            return Ok(Vec::new());
105        };
106
107        Ok(self.filter_candidates(candidates, type_done, where_done))
108    }
109
110    /// Apply the in-memory predicate pass over a candidate set returned by a
111    /// sidecar reader: the `type` predicate (unless `type_already_applied`,
112    /// because [`Store::find_by_type`] guarantees it), the [`with_layer`] scope,
113    /// and every remaining `where` clause (skipping the first
114    /// `wheres_already_applied`, which [`Store::find_by_where`] pre-filtered).
115    /// All surviving predicates AND together.
116    ///
117    /// Split out from [`Query::execute`] so the composition is exercisable over
118    /// hand-built [`IndexRecord`]s independent of the sidecar I/O.
119    ///
120    /// [`with_layer`]: Query::with_layer
121    fn filter_candidates(
122        &self,
123        candidates: Vec<IndexRecord>,
124        type_already_applied: bool,
125        wheres_already_applied: usize,
126    ) -> Vec<IndexRecord> {
127        candidates
128            .into_iter()
129            .filter(|record| {
130                if !type_already_applied {
131                    if let Some(type_) = &self.type_ {
132                        if record.type_ != *type_ {
133                            return false;
134                        }
135                    }
136                }
137                if let Some(layer) = self.layer {
138                    if !record_in_layer(record, layer) {
139                        return false;
140                    }
141                }
142                self.wheres
143                    .iter()
144                    .skip(wheres_already_applied)
145                    .all(|(key, value)| record_matches_where(record, key, value))
146            })
147            .collect()
148    }
149}
150
151/// True if `record`'s store-relative `path` lives under `layer`'s top-level
152/// folder (`sources/` / `records/` / `wiki/`). The sidecar readers can return
153/// records from any layer (a `type` folder name is not unique across layers),
154/// so a `with_layer` scope is enforced here on the record's path.
155fn record_in_layer(record: &IndexRecord, layer: Layer) -> bool {
156    record
157        .path
158        .components()
159        .next()
160        .and_then(|c| c.as_os_str().to_str())
161        == Some(layer_dir_name(layer))
162}
163
164/// The top-level folder name for a [`Layer`] (`"sources"` / `"records"` /
165/// `"wiki"`). Kept local so the layer-scope filter is self-contained and does
166/// not couple `query` to the store-walk module's dir-name helpers.
167fn layer_dir_name(layer: Layer) -> &'static str {
168    match layer {
169        Layer::Sources => "sources",
170        Layer::Records => "records",
171        Layer::Wiki => "wiki",
172    }
173}
174
175/// True if `record` satisfies a single `key=value` frontmatter predicate.
176///
177/// The universal-contract keys map to their typed [`IndexRecord`] columns
178/// (`type`, `summary`, `created`, `updated`, plus the list-valued `tags` /
179/// `links` which match when `value` is one of the members); every other key is
180/// looked up in [`IndexRecord::fields`] and compared with
181/// [`json_value_matches`]. An absent key never matches.
182fn record_matches_where(record: &IndexRecord, key: &str, value: &str) -> bool {
183    match key {
184        "type" => record.type_ == value,
185        "summary" => record.summary == value,
186        "path" => record.path.to_str() == Some(value),
187        // List-valued columns match on membership: `tags=urgent` is true when
188        // `urgent` is one of the file's tags.
189        "tags" => record.tags.iter().any(|t| t == value),
190        "links" => record.links.iter().any(|l| l == value),
191        // Timestamps compare as instants (both sides parsed as RFC3339) so a
192        // `Z`-form query matches a `+00:00`-form stored value and vice versa.
193        // A plain string compare of `to_rfc3339()` would disagree with the
194        // `Store::find_by_where_in` sidecar pre-filter — which this in-memory
195        // pass re-runs over — and silently drop real matches.
196        "created" => timestamp_value_matches(record.created, value),
197        "updated" => timestamp_value_matches(record.updated, value),
198        _ => record
199            .fields
200            .get(key)
201            .is_some_and(|v| json_value_matches(v, value)),
202    }
203}
204
205/// Compare a sidecar [`Value`] against the string `value` from a `key=value`
206/// predicate. The CLI surface is all strings, so matching is defined against
207/// the value's natural string form:
208///
209/// - a string matches when equal;
210/// - a number matches when its canonical render equals `value` (so `42` matches
211///   `"42"`, and `12.5` matches `"12.5"`);
212/// - a bool matches `"true"` / `"false"`;
213/// - an array matches when **any** element matches (so a list-valued custom
214///   field behaves like `tags` — membership, not whole-list equality);
215/// - `null` never matches (a present-but-null field is treated as no value).
216fn json_value_matches(value: &Value, target: &str) -> bool {
217    match value {
218        Value::String(s) => s == target,
219        Value::Number(n) => n.to_string() == target,
220        Value::Bool(b) => b.to_string() == target,
221        Value::Array(items) => items.iter().any(|item| json_value_matches(item, target)),
222        Value::Null => false,
223        // Objects have no scalar form a `key=value` predicate can match.
224        Value::Object(_) => false,
225    }
226}
227
228/// Match a stored instant against a `key=value` predicate by parsing `value` as
229/// RFC3339 and comparing instants. A plain string compare of `to_rfc3339()`
230/// (which always emits the numeric `+00:00` offset, never `Z`) would reject a
231/// `…Z` query against the identical moment, and disagree with the sidecar
232/// pre-filter [`Store::find_by_where_in`], silently dropping real matches.
233fn timestamp_value_matches(stored: Option<DateTime<FixedOffset>>, value: &str) -> bool {
234    match (stored, DateTime::parse_from_rfc3339(value)) {
235        (Some(stored), Ok(queried)) => stored == queried,
236        _ => false,
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243    use crate::store::Store;
244    use std::fs;
245    use std::path::PathBuf;
246    use tempfile::TempDir;
247
248    // ── Fixtures ─────────────────────────────────────────────────────────────
249
250    /// Build an [`IndexRecord`] with the given store-relative path, type, and
251    /// extra (`fields`) frontmatter, leaving the timestamp/list columns empty.
252    /// Tests that need `tags`/`links`/`created` set them on the returned value.
253    fn rec(path: &str, type_: &str, fields: &[(&str, Value)]) -> IndexRecord {
254        IndexRecord {
255            path: PathBuf::from(path),
256            type_: type_.to_string(),
257            summary: format!("summary of {path}"),
258            tags: Vec::new(),
259            links: Vec::new(),
260            created: None,
261            updated: None,
262            fields: fields
263                .iter()
264                .map(|(k, v)| (k.to_string(), v.clone()))
265                .collect(),
266        }
267    }
268
269    /// Serialize one record to a single JSONL line (what a real sidecar holds).
270    fn jsonl_line(record: &IndexRecord) -> String {
271        serde_json::to_string(record).expect("serialize IndexRecord")
272    }
273
274    /// A minimal but valid `DB.md` marker (a `---` frontmatter block, which
275    /// `parse_db_md` requires; the body is empty so the config is the default).
276    const DB_MD: &str = "---\ntype: db-md\n---\n\n# Test store\n";
277
278    /// Write a temp store: a `DB.md` marker plus an `index.jsonl` sidecar at
279    /// each `(store-relative folder, records)` entry. Returns the temp dir
280    /// (kept alive by the caller) and the opened [`Store`].
281    fn store_with_sidecars(sidecars: &[(&str, &[IndexRecord])]) -> (TempDir, Store) {
282        let dir = TempDir::new().expect("temp dir");
283        let root = dir.path();
284        fs::write(root.join("DB.md"), DB_MD).expect("write DB.md");
285
286        for (folder, records) in sidecars {
287            let folder_abs = root.join(folder);
288            fs::create_dir_all(&folder_abs).expect("create type folder");
289            let body: String = records
290                .iter()
291                .map(|r| format!("{}\n", jsonl_line(r)))
292                .collect();
293            fs::write(folder_abs.join("index.jsonl"), body).expect("write index.jsonl");
294        }
295
296        let store = Store::open(root).expect("open store");
297        (dir, store)
298    }
299
300    /// The set of store-relative path strings in a result set, for order-
301    /// independent assertions.
302    fn paths(records: &[IndexRecord]) -> std::collections::BTreeSet<String> {
303        records
304            .iter()
305            .map(|r| r.path.to_string_lossy().into_owned())
306            .collect()
307    }
308
309    fn path_set(items: &[&str]) -> std::collections::BTreeSet<String> {
310        items.iter().map(|s| s.to_string()).collect()
311    }
312
313    // ── Builder state ────────────────────────────────────────────────────────
314
315    #[test]
316    fn builder_accumulates_predicates() {
317        let q = Query::new()
318            .with_type("contact")
319            .with_layer(Layer::Records)
320            .with_where("company", "acme")
321            .with_where("status", "active");
322
323        assert_eq!(q.type_.as_deref(), Some("contact"));
324        assert_eq!(q.layer, Some(Layer::Records));
325        assert_eq!(
326            q.wheres,
327            vec![
328                ("company".to_string(), "acme".to_string()),
329                ("status".to_string(), "active".to_string()),
330            ],
331            "each with_where appends a distinct clause"
332        );
333    }
334
335    #[test]
336    fn with_type_and_with_layer_replace_rather_than_stack() {
337        let q = Query::new()
338            .with_type("contact")
339            .with_type("company")
340            .with_layer(Layer::Sources)
341            .with_layer(Layer::Wiki);
342        assert_eq!(q.type_.as_deref(), Some("company"));
343        assert_eq!(q.layer, Some(Layer::Wiki));
344    }
345
346    #[test]
347    fn repeated_with_where_same_key_keeps_both_clauses() {
348        // Two clauses on the same key must both be retained (range-style AND),
349        // not collapsed to the last one.
350        let q = Query::new()
351            .with_where("updated", "2026-01-01T00:00:00+00:00")
352            .with_where("updated", "2026-02-01T00:00:00+00:00");
353        assert_eq!(q.wheres.len(), 2);
354    }
355
356    // ── execute: real sidecars on disk ───────────────────────────────────────
357
358    #[test]
359    fn execute_with_type_returns_only_that_types_folder() {
360        let contacts = [
361            rec("records/contacts/sarah.md", "contact", &[]),
362            rec("records/contacts/mara.md", "contact", &[]),
363        ];
364        let companies = [rec("records/companies/acme.md", "company", &[])];
365        let (_dir, store) = store_with_sidecars(&[
366            ("records/contacts", &contacts),
367            ("records/companies", &companies),
368        ]);
369
370        let got = Query::new().with_type("contact").execute(&store).unwrap();
371
372        assert_eq!(
373            paths(&got),
374            path_set(&["records/contacts/sarah.md", "records/contacts/mara.md"]),
375            "a type query reads its own type-folder sidecar and excludes other types"
376        );
377    }
378
379    #[test]
380    fn execute_type_plus_where_intersects_on_a_custom_field() {
381        let contacts = [
382            rec(
383                "records/contacts/sarah.md",
384                "contact",
385                &[("company", Value::String("acme".into()))],
386            ),
387            rec(
388                "records/contacts/mara.md",
389                "contact",
390                &[("company", Value::String("globex".into()))],
391            ),
392            rec("records/contacts/no-company.md", "contact", &[]),
393        ];
394        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
395
396        let got = Query::new()
397            .with_type("contact")
398            .with_where("company", "acme")
399            .execute(&store)
400            .unwrap();
401
402        assert_eq!(
403            paths(&got),
404            path_set(&["records/contacts/sarah.md"]),
405            "the where clause narrows the type's records to the matching field; \
406             a record missing the key does not match"
407        );
408    }
409
410    #[test]
411    fn execute_multiple_where_clauses_and_together() {
412        let contacts = [
413            rec(
414                "records/contacts/a.md",
415                "contact",
416                &[
417                    ("company", Value::String("acme".into())),
418                    ("status", Value::String("active".into())),
419                ],
420            ),
421            rec(
422                "records/contacts/b.md",
423                "contact",
424                &[
425                    ("company", Value::String("acme".into())),
426                    ("status", Value::String("churned".into())),
427                ],
428            ),
429            rec(
430                "records/contacts/c.md",
431                "contact",
432                &[
433                    ("company", Value::String("globex".into())),
434                    ("status", Value::String("active".into())),
435                ],
436            ),
437        ];
438        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
439
440        let got = Query::new()
441            .with_type("contact")
442            .with_where("company", "acme")
443            .with_where("status", "active")
444            .execute(&store)
445            .unwrap();
446
447        // Only `a` satisfies BOTH clauses. If the clauses were OR'd, `b` and `c`
448        // would leak in.
449        assert_eq!(paths(&got), path_set(&["records/contacts/a.md"]));
450    }
451
452    #[test]
453    fn execute_where_without_type_reads_across_sidecars() {
454        // `find_by_where` scans every sidecar; the same `domain` value lives in
455        // both a contact and a company record, and both come back.
456        let contacts = [rec(
457            "records/contacts/sarah.md",
458            "contact",
459            &[("domain", Value::String("acme.com".into()))],
460        )];
461        let companies = [
462            rec(
463                "records/companies/acme.md",
464                "company",
465                &[("domain", Value::String("acme.com".into()))],
466            ),
467            rec(
468                "records/companies/globex.md",
469                "company",
470                &[("domain", Value::String("globex.com".into()))],
471            ),
472        ];
473        let (_dir, store) = store_with_sidecars(&[
474            ("records/contacts", &contacts),
475            ("records/companies", &companies),
476        ]);
477
478        let got = Query::new()
479            .with_where("domain", "acme.com")
480            .execute(&store)
481            .unwrap();
482
483        assert_eq!(
484            paths(&got),
485            path_set(&["records/contacts/sarah.md", "records/companies/acme.md"]),
486            "a where-only query matches the field across every type-folder sidecar"
487        );
488    }
489
490    #[test]
491    fn execute_with_layer_scopes_by_path() {
492        // Same custom field value present in two layers; the layer scope must
493        // keep only the records under the named layer folder.
494        let source_recs = [rec(
495            "sources/notes/n1.md",
496            "note",
497            &[("topic", Value::String("billing".into()))],
498        )];
499        let record_recs = [rec(
500            "records/notes/n2.md",
501            "note",
502            &[("topic", Value::String("billing".into()))],
503        )];
504        let (_dir, store) = store_with_sidecars(&[
505            ("sources/notes", &source_recs),
506            ("records/notes", &record_recs),
507        ]);
508
509        // Without a layer scope, both layers' records match.
510        let unscoped = Query::new()
511            .with_where("topic", "billing")
512            .execute(&store)
513            .unwrap();
514        assert_eq!(
515            paths(&unscoped),
516            path_set(&["sources/notes/n1.md", "records/notes/n2.md"]),
517        );
518
519        // Scoped to Sources, only the sources-layer record survives.
520        let scoped = Query::new()
521            .with_where("topic", "billing")
522            .with_layer(Layer::Sources)
523            .execute(&store)
524            .unwrap();
525        assert_eq!(
526            paths(&scoped),
527            path_set(&["sources/notes/n1.md"]),
528            "with_layer(Sources) drops the records/-layer record"
529        );
530    }
531
532    #[test]
533    fn execute_where_only_with_layer_confines_sidecar_io_not_just_result() {
534        // The O(entities-in-layer) contract for a `--where`-only query (no
535        // `--type`): `--in <layer>` must scope the *sidecar read*, not merely
536        // filter the result after a whole-store read. Proven structurally — a
537        // corrupt sidecar in another layer would make the read error if it were
538        // touched, so a layer-scoped query that SUCCEEDS is proof the
539        // out-of-scope layer's I/O never happened.
540        let dir = TempDir::new().unwrap();
541        let root = dir.path();
542        fs::write(root.join("DB.md"), DB_MD).unwrap();
543
544        // In-scope layer: a valid sidecar with the matching record.
545        let records_dir = root.join("records/contacts");
546        fs::create_dir_all(&records_dir).unwrap();
547        let match_rec = rec(
548            "records/contacts/sarah.md",
549            "contact",
550            &[("domain", Value::String("acme.com".into()))],
551        );
552        fs::write(
553            records_dir.join("index.jsonl"),
554            format!("{}\n", jsonl_line(&match_rec)),
555        )
556        .unwrap();
557
558        // Out-of-scope layer: a CORRUPT sidecar. If a `--in records` query read
559        // it, `read_type_index` would error.
560        let sources_dir = root.join("sources/emails");
561        fs::create_dir_all(&sources_dir).unwrap();
562        fs::write(sources_dir.join("index.jsonl"), "{ not valid json }\n").unwrap();
563
564        let store = Store::open(root).unwrap();
565
566        // Scoped to records: succeeds and returns only the records-layer match,
567        // because the corrupt sources sidecar was never walked.
568        let scoped = Query::new()
569            .with_where("domain", "acme.com")
570            .with_layer(Layer::Records)
571            .execute(&store)
572            .expect("a records-scoped where query must not read the sources sidecar");
573        assert_eq!(paths(&scoped), path_set(&["records/contacts/sarah.md"]));
574
575        // Unscoped: the same query DOES walk every layer and trips over the
576        // corrupt sidecar — proving the corrupt file is real and that only the
577        // layer scope spared the scoped read from reading it.
578        let unscoped = Query::new()
579            .with_where("domain", "acme.com")
580            .execute(&store);
581        assert!(
582            unscoped.is_err(),
583            "an unscoped where query reads every sidecar, including the corrupt one"
584        );
585    }
586
587    #[test]
588    fn execute_full_composition_type_layer_where() {
589        let contacts = [
590            rec(
591                "records/contacts/match.md",
592                "contact",
593                &[("city", Value::String("denver".into()))],
594            ),
595            rec(
596                "records/contacts/wrong-city.md",
597                "contact",
598                &[("city", Value::String("austin".into()))],
599            ),
600        ];
601        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
602
603        let got = Query::new()
604            .with_type("contact")
605            .with_layer(Layer::Records)
606            .with_where("city", "denver")
607            .execute(&store)
608            .unwrap();
609        assert_eq!(paths(&got), path_set(&["records/contacts/match.md"]));
610
611        // The same query scoped to the wrong layer yields nothing, proving the
612        // layer predicate is live in the composed path.
613        let wrong_layer = Query::new()
614            .with_type("contact")
615            .with_layer(Layer::Wiki)
616            .with_where("city", "denver")
617            .execute(&store)
618            .unwrap();
619        assert!(wrong_layer.is_empty());
620    }
621
622    #[test]
623    fn execute_empty_query_selects_no_sidecar() {
624        // A query with neither a type nor a where clause has no walk-free
625        // candidate set and must return empty WITHOUT touching the store walk.
626        let contacts = [rec("records/contacts/sarah.md", "contact", &[])];
627        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
628
629        let got = Query::new().execute(&store).unwrap();
630        assert!(
631            got.is_empty(),
632            "an unconstrained query resolves to empty, not to every record"
633        );
634
635        // A layer-only query likewise selects no sidecar (no type/where to pick
636        // one), so it is empty too — even though records exist in that layer.
637        let layer_only = Query::new()
638            .with_layer(Layer::Records)
639            .execute(&store)
640            .unwrap();
641        assert!(layer_only.is_empty());
642    }
643
644    #[test]
645    fn execute_tag_membership_via_where() {
646        let mut urgent = rec("records/tasks/t1.md", "task", &[]);
647        urgent.tags = vec!["urgent".into(), "ops".into()];
648        let mut calm = rec("records/tasks/t2.md", "task", &[]);
649        calm.tags = vec!["ops".into()];
650        let recs = [urgent, calm];
651        let (_dir, store) = store_with_sidecars(&[("records/tasks", &recs)]);
652
653        let got = Query::new()
654            .with_type("task")
655            .with_where("tags", "urgent")
656            .execute(&store)
657            .unwrap();
658        assert_eq!(
659            paths(&got),
660            path_set(&["records/tasks/t1.md"]),
661            "tags match on membership: only the record carrying the tag matches"
662        );
663    }
664
665    #[test]
666    fn execute_matches_numeric_and_bool_fields_from_string_predicate() {
667        let recs = [
668            rec(
669                "records/invoices/paid.md",
670                "invoice",
671                &[
672                    ("amount", Value::Number(42.into())),
673                    ("paid", Value::Bool(true)),
674                ],
675            ),
676            rec(
677                "records/invoices/unpaid.md",
678                "invoice",
679                &[
680                    ("amount", Value::Number(99.into())),
681                    ("paid", Value::Bool(false)),
682                ],
683            ),
684        ];
685        let (_dir, store) = store_with_sidecars(&[("records/invoices", &recs)]);
686
687        let by_amount = Query::new()
688            .with_type("invoice")
689            .with_where("amount", "42")
690            .execute(&store)
691            .unwrap();
692        assert_eq!(
693            paths(&by_amount),
694            path_set(&["records/invoices/paid.md"]),
695            "a JSON number matches the string form of the predicate"
696        );
697
698        let by_paid = Query::new()
699            .with_type("invoice")
700            .with_where("paid", "true")
701            .execute(&store)
702            .unwrap();
703        assert_eq!(
704            paths(&by_paid),
705            path_set(&["records/invoices/paid.md"]),
706            "a JSON bool matches \"true\"/\"false\""
707        );
708    }
709
710    #[test]
711    fn execute_honors_last_write_wins_in_sidecar() {
712        // Two JSONL lines for the same path: the later supersedes the earlier
713        // (read_type_index applies last-write-wins). A query on the superseding
714        // field must match, and one on the superseded field must not.
715        let dir = TempDir::new().unwrap();
716        let root = dir.path();
717        fs::write(root.join("DB.md"), DB_MD).unwrap();
718        let folder = root.join("records/contacts");
719        fs::create_dir_all(&folder).unwrap();
720
721        let old = rec(
722            "records/contacts/sarah.md",
723            "contact",
724            &[("status", Value::String("lead".into()))],
725        );
726        let new = rec(
727            "records/contacts/sarah.md",
728            "contact",
729            &[("status", Value::String("customer".into()))],
730        );
731        fs::write(
732            folder.join("index.jsonl"),
733            format!("{}\n{}\n", jsonl_line(&old), jsonl_line(&new)),
734        )
735        .unwrap();
736        let store = Store::open(root).unwrap();
737
738        let superseding = Query::new()
739            .with_type("contact")
740            .with_where("status", "customer")
741            .execute(&store)
742            .unwrap();
743        assert_eq!(superseding.len(), 1, "the superseding line's value matches");
744
745        let superseded = Query::new()
746            .with_type("contact")
747            .with_where("status", "lead")
748            .execute(&store)
749            .unwrap();
750        assert!(
751            superseded.is_empty(),
752            "the superseded line's value no longer matches after last-write-wins"
753        );
754    }
755
756    #[test]
757    fn execute_returns_full_records_not_just_paths() {
758        // The contract returns full IndexRecords straight from the sidecar:
759        // summary, tags, links, and fields must survive the round-trip.
760        let mut r = rec(
761            "records/contacts/sarah.md",
762            "contact",
763            &[("company", Value::String("acme".into()))],
764        );
765        r.summary = "Renewal champion".into();
766        r.tags = vec!["vip".into()];
767        r.links = vec!["wiki/people/sarah-chen.md".into()];
768        let recs = [r];
769        let (_dir, store) = store_with_sidecars(&[("records/contacts", &recs)]);
770
771        let got = Query::new().with_type("contact").execute(&store).unwrap();
772        assert_eq!(got.len(), 1);
773        let only = &got[0];
774        assert_eq!(only.summary, "Renewal champion");
775        assert_eq!(only.tags, vec!["vip".to_string()]);
776        assert_eq!(only.links, vec!["wiki/people/sarah-chen.md".to_string()]);
777        assert_eq!(
778            only.fields.get("company"),
779            Some(&Value::String("acme".into())),
780            "type-specific fields come back verbatim for on-demand use"
781        );
782    }
783
784    // ── Pure matcher logic (no store I/O) ────────────────────────────────────
785
786    #[test]
787    fn record_matches_where_on_typed_columns() {
788        let mut r = rec("records/contacts/x.md", "contact", &[]);
789        r.summary = "hello".into();
790
791        assert!(record_matches_where(&r, "type", "contact"));
792        assert!(!record_matches_where(&r, "type", "company"));
793        assert!(record_matches_where(&r, "summary", "hello"));
794        assert!(!record_matches_where(&r, "summary", "goodbye"));
795        assert!(record_matches_where(&r, "path", "records/contacts/x.md"));
796        assert!(!record_matches_where(&r, "path", "records/contacts/y.md"));
797    }
798
799    #[test]
800    fn record_matches_where_on_timestamps_uses_rfc3339() {
801        let mut r = rec("records/meetings/m.md", "meeting", &[]);
802        let ts = chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00+00:00").unwrap();
803        r.created = Some(ts);
804
805        assert!(record_matches_where(
806            &r,
807            "created",
808            "2026-05-29T12:00:00+00:00"
809        ));
810        assert!(!record_matches_where(
811            &r,
812            "created",
813            "2026-05-29T13:00:00+00:00"
814        ));
815        // `updated` is unset → never matches, even the same instant.
816        assert!(!record_matches_where(
817            &r,
818            "updated",
819            "2026-05-29T12:00:00+00:00"
820        ));
821    }
822
823    #[test]
824    fn record_matches_where_timestamp_z_and_offset_spellings_are_equal() {
825        // Regression: the in-memory filter compared `to_rfc3339()` (always the
826        // `+00:00` form) to the raw predicate string, so a `Z`-spelled query of
827        // the identical instant silently failed — and disagreed with the
828        // `Store::find_by_where_in` sidecar pre-filter (instant-based),
829        // dropping real matches. Both spellings must compare equal now.
830        let mut stored_z = rec("records/meetings/m.md", "meeting", &[]);
831        stored_z.created =
832            Some(chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00Z").unwrap());
833        assert!(record_matches_where(
834            &stored_z,
835            "created",
836            "2026-05-29T12:00:00Z"
837        ));
838        assert!(record_matches_where(
839            &stored_z,
840            "created",
841            "2026-05-29T12:00:00+00:00"
842        ));
843
844        // Stored as `+00:00`, queried as `Z` — this is the spelling pair that
845        // failed before the fix.
846        let mut stored_offset = rec("records/meetings/n.md", "meeting", &[]);
847        stored_offset.created =
848            Some(chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00+00:00").unwrap());
849        assert!(record_matches_where(
850            &stored_offset,
851            "created",
852            "2026-05-29T12:00:00Z"
853        ));
854
855        // A different instant still does not match; an unparseable value is false.
856        assert!(!record_matches_where(
857            &stored_z,
858            "created",
859            "2026-05-29T13:00:00Z"
860        ));
861        assert!(!record_matches_where(
862            &stored_z,
863            "created",
864            "not-a-timestamp"
865        ));
866    }
867
868    #[test]
869    fn record_matches_where_absent_field_is_false() {
870        let r = rec("records/contacts/x.md", "contact", &[]);
871        assert!(
872            !record_matches_where(&r, "nonexistent", "anything"),
873            "an absent frontmatter key never matches"
874        );
875    }
876
877    #[test]
878    fn json_value_matches_covers_scalars_and_arrays() {
879        assert!(json_value_matches(&Value::String("acme".into()), "acme"));
880        assert!(!json_value_matches(&Value::String("acme".into()), "globex"));
881
882        assert!(json_value_matches(&Value::Number(42.into()), "42"));
883        assert!(!json_value_matches(&Value::Number(42.into()), "43"));
884
885        assert!(json_value_matches(&Value::Bool(true), "true"));
886        assert!(json_value_matches(&Value::Bool(false), "false"));
887        assert!(!json_value_matches(&Value::Bool(true), "false"));
888
889        let arr = Value::Array(vec![Value::String("a".into()), Value::String("b".into())]);
890        assert!(json_value_matches(&arr, "b"), "array matches on membership");
891        assert!(!json_value_matches(&arr, "c"));
892    }
893
894    #[test]
895    fn json_value_matches_null_and_object_never_match() {
896        assert!(!json_value_matches(&Value::Null, ""));
897        assert!(!json_value_matches(&Value::Null, "null"));
898        let obj = serde_json::json!({"k": "v"});
899        assert!(!json_value_matches(&obj, "v"));
900    }
901
902    #[test]
903    fn record_in_layer_keys_off_first_path_component() {
904        let s = rec("sources/emails/e.md", "email", &[]);
905        let r = rec("records/contacts/c.md", "contact", &[]);
906        let w = rec("wiki/people/p.md", "wiki-page", &[]);
907
908        assert!(record_in_layer(&s, Layer::Sources));
909        assert!(!record_in_layer(&s, Layer::Records));
910        assert!(record_in_layer(&r, Layer::Records));
911        assert!(!record_in_layer(&r, Layer::Wiki));
912        assert!(record_in_layer(&w, Layer::Wiki));
913        assert!(!record_in_layer(&w, Layer::Sources));
914    }
915
916    #[test]
917    fn filter_candidates_skips_already_applied_where_clause() {
918        // Simulate the find_by_where path: the first clause is "already applied"
919        // by the sidecar reader, so filter_candidates must skip it and only
920        // enforce the remaining clause. A record satisfying only the (skipped)
921        // first clause but NOT the second must still be dropped.
922        let q = Query::new()
923            .with_where("company", "acme")
924            .with_where("status", "active");
925
926        let keep = rec(
927            "records/contacts/keep.md",
928            "contact",
929            &[
930                ("company", Value::String("acme".into())),
931                ("status", Value::String("active".into())),
932            ],
933        );
934        let drop = rec(
935            "records/contacts/drop.md",
936            "contact",
937            &[
938                ("company", Value::String("acme".into())),
939                ("status", Value::String("churned".into())),
940            ],
941        );
942
943        let out = q.filter_candidates(vec![keep, drop], false, 1);
944        assert_eq!(
945            paths(&out),
946            path_set(&["records/contacts/keep.md"]),
947            "the second clause is enforced even when the first is pre-applied"
948        );
949    }
950
951    #[test]
952    fn filter_candidates_enforces_type_when_not_preapplied() {
953        // When the candidate set did NOT come from find_by_type (type_applied =
954        // false), filter_candidates must still drop records of the wrong type.
955        let q = Query::new().with_type("contact");
956        let contact = rec("records/contacts/c.md", "contact", &[]);
957        let company = rec("records/companies/co.md", "company", &[]);
958
959        let out = q.filter_candidates(vec![contact, company], false, 0);
960        assert_eq!(paths(&out), path_set(&["records/contacts/c.md"]));
961    }
962
963    /// Local guard: the test fixtures write sidecars under the same canonical
964    /// folders the store reader derives, so a `with_type` query finds them.
965    /// If this drifts, the integration tests above silently weaken — assert the
966    /// convention explicitly.
967    #[test]
968    fn fixture_canonical_folders_match_store_expectations() {
969        let contacts = [rec("records/contacts/x.md", "contact", &[])];
970        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
971        // `contact` records live at records/contacts/ — the same folder the
972        // fixture wrote — so the type read is non-empty.
973        let got = store.find_by_type("contact").unwrap();
974        assert_eq!(got.len(), 1, "fixture folder == store's canonical folder");
975    }
976}