Skip to main content

dbmd_core/
query.rs

1//! `query` — Dataview-style filters, **sidecar-backed**.
2//!
3//! Resolves against the type-folder `index.jsonl` sidecar(s) via
4//! [`Store::find_by_type`] / [`Store::find_by_where`] /
5//! [`Store::read_type_index`] — one sequential, complete read per type-folder,
6//! cold-cache-proof — **never** a walk-and-parse. Returns full
7//! [`IndexRecord`]s straight from the sidecar (path + fields + summary +
8//! links); the caller opens the underlying file only if it needs the body.
9//!
10//! Backs `dbmd search --type/--where`, `dbmd fm query`, `dbmd index query`, and
11//! `dbmd graph backlinks --type/--in`.
12
13use chrono::{DateTime, FixedOffset};
14use serde_json::Value;
15
16use crate::index::IndexRecord;
17use crate::store::{Layer, Store, StoreError};
18
19/// A composable, sidecar-backed filter over a store's records.
20///
21/// Build with [`Query::new`] and the `with_*` methods, then [`Query::execute`].
22/// Multiple [`Query::with_where`] clauses AND together (intersection over the
23/// sidecar records).
24#[derive(Debug, Clone, Default)]
25pub struct Query {
26    /// `type` predicate (`with_type`).
27    type_: Option<String>,
28    /// Layer scope (`with_layer` / `--in <layer>`).
29    layer: Option<Layer>,
30    /// `key=value` frontmatter predicates, ANDed.
31    wheres: Vec<(String, String)>,
32}
33
34impl Query {
35    /// Start a new, empty query (matches everything until narrowed).
36    pub fn new() -> Self {
37        Self::default()
38    }
39
40    /// Restrict to a single `type` (frontmatter `type` predicate).
41    ///
42    /// Setting it again replaces the previous value — a query has at most one
43    /// `type` (a record carries exactly one `type`, so two types would never
44    /// intersect).
45    pub fn with_type(mut self, type_: &str) -> Self {
46        self.type_ = Some(type_.to_string());
47        self
48    }
49
50    /// Restrict to one layer (`Sources` / `Records` / `Wiki`) — scopes which
51    /// sidecars' records survive. Setting it again replaces the previous layer.
52    pub fn with_layer(mut self, layer: Layer) -> Self {
53        self.layer = Some(layer);
54        self
55    }
56
57    /// Add a `key=value` frontmatter predicate; chains as AND with any others
58    /// (intersection over the sidecar records). Repeating the same `key` adds a
59    /// second clause — both must hold — rather than replacing the first.
60    pub fn with_where(mut self, key: &str, value: &str) -> Self {
61        self.wheres.push((key.to_string(), value.to_string()));
62        self
63    }
64
65    /// Resolve the query against the relevant type-folder `index.jsonl`
66    /// sidecar(s) and return the matching [`IndexRecord`]s — complete, one
67    /// sequential read per type-folder, no whole-store walk.
68    ///
69    /// The candidate set comes from the most selective frozen sidecar reader,
70    /// always **layer-scoped** when [`with_layer`] is set, so an `--in <layer>`
71    /// scope confines the sidecar walk to that layer's subtree
72    /// (O(entities-in-layer), the interactive-loop contract):
73    ///
74    /// - a `type` predicate reads the sidecars across the named layer (or the
75    ///   whole store when unscoped) and filters by the frontmatter `type`. The
76    ///   folder layout is convention, not enforcement (SPEC), so a record whose
77    ///   `type` is filed outside that type's canonical layer — a `contact` in
78    ///   `sources/`, a custom `screenshot` that only ever lives in `sources/` —
79    ///   is still found, and `--type X --in <other-layer>` returns exactly the
80    ///   records of that type filed under the other layer rather than always
81    ///   being empty;
82    /// - otherwise the first `where` clause picks the sidecars and pre-filters,
83    ///   scoped to the layer when set;
84    /// - otherwise (a layer scope but no `type`/`where`) the layer's own
85    ///   sidecar records are the candidate set, so `--in <layer>` on its own
86    ///   enumerates that layer instead of silently returning empty.
87    ///
88    /// Every remaining predicate is then applied in memory over the returned
89    /// records — no extra sidecar reads, no walk.
90    ///
91    /// [`with_layer`]: Query::with_layer
92    ///
93    /// A fully bare query (no `type`, no `where`, no layer) constrains nothing
94    /// and has no selective candidate set, so it returns an empty result.
95    pub fn execute(&self, store: &Store) -> Result<Vec<IndexRecord>, StoreError> {
96        // Pick the candidate set from the cheapest frozen sidecar reader, and
97        // remember which predicates that reader has already satisfied so the
98        // in-memory pass doesn't re-test them.
99        let (candidates, type_done, where_done) = if self.type_.is_some() {
100            // A `type` predicate resolves over the named layer's sidecars (or
101            // the whole store when unscoped), filtering by the frontmatter
102            // `type` rather than guessing a single canonical type-folder. This
103            // keeps the result complete across every folder — and every layer —
104            // the type is filed under, so a record filed outside the type's
105            // canonical layer is still returned and `--type X --in <layer>`
106            // resolves correctly. The in-memory pass below applies the `type`
107            // (and layer, when scoped via `--in`) predicate.
108            (store.sidecar_records(self.layer)?, false, 0)
109        } else if let Some((key, value)) = self.wheres.first() {
110            // No type to scope on: let the first `where` clause pick the
111            // sidecars and pre-filter. `self.layer` (when set) confines the
112            // sidecar walk to that layer's subtree, so a `--where`-only query
113            // is O(entities-in-layer), not O(store records) — the in-memory
114            // layer filter below then becomes a no-op for this path. The
115            // remaining clauses AND in memory.
116            (store.find_by_where_in(key, value, self.layer)?, false, 1)
117        } else if let Some(layer) = self.layer {
118            // Layer-only (`--in <layer>` with no type/where): enumerate that
119            // layer's sidecar records. The in-memory layer filter below is a
120            // no-op for this path (the read is already layer-scoped).
121            (store.sidecar_records(Some(layer))?, false, 0)
122        } else {
123            // Nothing selects a sidecar: no walk-free candidate set exists.
124            return Ok(Vec::new());
125        };
126
127        Ok(self.filter_candidates(candidates, type_done, where_done))
128    }
129
130    /// Apply the in-memory predicate pass over a candidate set returned by a
131    /// sidecar reader: the `type` predicate (unless `type_already_applied`,
132    /// when a reader has already guaranteed it), the [`with_layer`] scope, and
133    /// every remaining `where` clause (skipping the first
134    /// `wheres_already_applied`, which [`Store::find_by_where_in`] pre-filtered).
135    /// All surviving predicates AND together.
136    ///
137    /// Split out from [`Query::execute`] so the composition is exercisable over
138    /// hand-built [`IndexRecord`]s independent of the sidecar I/O.
139    ///
140    /// [`with_layer`]: Query::with_layer
141    fn filter_candidates(
142        &self,
143        candidates: Vec<IndexRecord>,
144        type_already_applied: bool,
145        wheres_already_applied: usize,
146    ) -> Vec<IndexRecord> {
147        candidates
148            .into_iter()
149            .filter(|record| {
150                if !type_already_applied {
151                    if let Some(type_) = &self.type_ {
152                        if record.type_ != *type_ {
153                            return false;
154                        }
155                    }
156                }
157                if let Some(layer) = self.layer {
158                    if !record_in_layer(record, layer) {
159                        return false;
160                    }
161                }
162                self.wheres
163                    .iter()
164                    .skip(wheres_already_applied)
165                    .all(|(key, value)| record_matches_where(record, key, value))
166            })
167            .collect()
168    }
169}
170
171/// True if `record`'s store-relative `path` lives under `layer`'s top-level
172/// folder (`sources/` / `records/` / `wiki/`). The sidecar readers can return
173/// records from any layer (a `type` folder name is not unique across layers),
174/// so a `with_layer` scope is enforced here on the record's path.
175fn record_in_layer(record: &IndexRecord, layer: Layer) -> bool {
176    record
177        .path
178        .components()
179        .next()
180        .and_then(|c| c.as_os_str().to_str())
181        == Some(layer_dir_name(layer))
182}
183
184/// The top-level folder name for a [`Layer`] (`"sources"` / `"records"` /
185/// `"wiki"`). Kept local so the layer-scope filter is self-contained and does
186/// not couple `query` to the store-walk module's dir-name helpers.
187fn layer_dir_name(layer: Layer) -> &'static str {
188    match layer {
189        Layer::Sources => "sources",
190        Layer::Records => "records",
191        Layer::Wiki => "wiki",
192    }
193}
194
195/// True if `record` satisfies a single `key=value` frontmatter predicate.
196///
197/// The universal-contract keys map to their typed [`IndexRecord`] columns
198/// (`type`, `summary`, `created`, `updated`, plus the list-valued `tags` /
199/// `links` which match when `value` is one of the members); every other key is
200/// looked up in [`IndexRecord::fields`] and compared with
201/// [`json_value_matches`]. An absent key never matches.
202fn record_matches_where(record: &IndexRecord, key: &str, value: &str) -> bool {
203    match key {
204        "type" => record.type_ == value,
205        "summary" => record.summary == value,
206        "path" => record.path.to_str() == Some(value),
207        // List-valued columns match on membership: `tags=urgent` is true when
208        // `urgent` is one of the file's tags.
209        "tags" => record.tags.iter().any(|t| t == value),
210        "links" => record.links.iter().any(|l| l == value),
211        // Timestamps compare as instants (both sides parsed as RFC3339) so a
212        // `Z`-form query matches a `+00:00`-form stored value and vice versa.
213        // A plain string compare of `to_rfc3339()` would disagree with the
214        // `Store::find_by_where_in` sidecar pre-filter — which this in-memory
215        // pass re-runs over — and silently drop real matches.
216        "created" => timestamp_value_matches(record.created, value),
217        "updated" => timestamp_value_matches(record.updated, value),
218        _ => record
219            .fields
220            .get(key)
221            .is_some_and(|v| json_value_matches(v, value)),
222    }
223}
224
225/// Compare a sidecar [`Value`] against the string `value` from a `key=value`
226/// predicate. The CLI surface is all strings, so matching is defined against
227/// the value's natural string form:
228///
229/// - a string matches when equal;
230/// - a number matches when its canonical render equals `value` (so `42` matches
231///   `"42"`, and `12.5` matches `"12.5"`);
232/// - a bool matches `"true"` / `"false"`;
233/// - an array matches when **any** element matches (so a list-valued custom
234///   field behaves like `tags` — membership, not whole-list equality);
235/// - `null` never matches (a present-but-null field is treated as no value).
236fn json_value_matches(value: &Value, target: &str) -> bool {
237    match value {
238        Value::String(s) => s == target,
239        Value::Number(n) => n.to_string() == target,
240        Value::Bool(b) => b.to_string() == target,
241        Value::Array(items) => items.iter().any(|item| json_value_matches(item, target)),
242        Value::Null => false,
243        // Objects have no scalar form a `key=value` predicate can match.
244        Value::Object(_) => false,
245    }
246}
247
248/// Match a stored instant against a `key=value` predicate by parsing `value` as
249/// RFC3339 and comparing instants. A plain string compare of `to_rfc3339()`
250/// (which always emits the numeric `+00:00` offset, never `Z`) would reject a
251/// `…Z` query against the identical moment, and disagree with the sidecar
252/// pre-filter [`Store::find_by_where_in`], silently dropping real matches.
253fn timestamp_value_matches(stored: Option<DateTime<FixedOffset>>, value: &str) -> bool {
254    match (stored, DateTime::parse_from_rfc3339(value)) {
255        (Some(stored), Ok(queried)) => stored == queried,
256        _ => false,
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263    use crate::store::Store;
264    use std::fs;
265    use std::path::PathBuf;
266    use tempfile::TempDir;
267
268    // ── Fixtures ─────────────────────────────────────────────────────────────
269
270    /// Build an [`IndexRecord`] with the given store-relative path, type, and
271    /// extra (`fields`) frontmatter, leaving the timestamp/list columns empty.
272    /// Tests that need `tags`/`links`/`created` set them on the returned value.
273    fn rec(path: &str, type_: &str, fields: &[(&str, Value)]) -> IndexRecord {
274        IndexRecord {
275            path: PathBuf::from(path),
276            type_: type_.to_string(),
277            summary: format!("summary of {path}"),
278            tags: Vec::new(),
279            links: Vec::new(),
280            created: None,
281            updated: None,
282            fields: fields
283                .iter()
284                .map(|(k, v)| (k.to_string(), v.clone()))
285                .collect(),
286        }
287    }
288
289    /// Serialize one record to a single JSONL line (what a real sidecar holds).
290    fn jsonl_line(record: &IndexRecord) -> String {
291        serde_json::to_string(record).expect("serialize IndexRecord")
292    }
293
294    /// A minimal but valid `DB.md` marker (a `---` frontmatter block, which
295    /// `parse_db_md` requires; the body is empty so the config is the default).
296    const DB_MD: &str = "---\ntype: db-md\n---\n\n# Test store\n";
297
298    /// Write a temp store: a `DB.md` marker plus an `index.jsonl` sidecar at
299    /// each `(store-relative folder, records)` entry. Returns the temp dir
300    /// (kept alive by the caller) and the opened [`Store`].
301    fn store_with_sidecars(sidecars: &[(&str, &[IndexRecord])]) -> (TempDir, Store) {
302        let dir = TempDir::new().expect("temp dir");
303        let root = dir.path();
304        fs::write(root.join("DB.md"), DB_MD).expect("write DB.md");
305
306        for (folder, records) in sidecars {
307            let folder_abs = root.join(folder);
308            fs::create_dir_all(&folder_abs).expect("create type folder");
309            let body: String = records
310                .iter()
311                .map(|r| format!("{}\n", jsonl_line(r)))
312                .collect();
313            fs::write(folder_abs.join("index.jsonl"), body).expect("write index.jsonl");
314        }
315
316        let store = Store::open(root).expect("open store");
317        (dir, store)
318    }
319
320    /// The set of store-relative path strings in a result set, for order-
321    /// independent assertions.
322    fn paths(records: &[IndexRecord]) -> std::collections::BTreeSet<String> {
323        records
324            .iter()
325            .map(|r| r.path.to_string_lossy().into_owned())
326            .collect()
327    }
328
329    fn path_set(items: &[&str]) -> std::collections::BTreeSet<String> {
330        items.iter().map(|s| s.to_string()).collect()
331    }
332
333    // ── Builder state ────────────────────────────────────────────────────────
334
335    #[test]
336    fn builder_accumulates_predicates() {
337        let q = Query::new()
338            .with_type("contact")
339            .with_layer(Layer::Records)
340            .with_where("company", "acme")
341            .with_where("status", "active");
342
343        assert_eq!(q.type_.as_deref(), Some("contact"));
344        assert_eq!(q.layer, Some(Layer::Records));
345        assert_eq!(
346            q.wheres,
347            vec![
348                ("company".to_string(), "acme".to_string()),
349                ("status".to_string(), "active".to_string()),
350            ],
351            "each with_where appends a distinct clause"
352        );
353    }
354
355    #[test]
356    fn with_type_and_with_layer_replace_rather_than_stack() {
357        let q = Query::new()
358            .with_type("contact")
359            .with_type("company")
360            .with_layer(Layer::Sources)
361            .with_layer(Layer::Wiki);
362        assert_eq!(q.type_.as_deref(), Some("company"));
363        assert_eq!(q.layer, Some(Layer::Wiki));
364    }
365
366    #[test]
367    fn repeated_with_where_same_key_keeps_both_clauses() {
368        // Two clauses on the same key must both be retained (range-style AND),
369        // not collapsed to the last one.
370        let q = Query::new()
371            .with_where("updated", "2026-01-01T00:00:00+00:00")
372            .with_where("updated", "2026-02-01T00:00:00+00:00");
373        assert_eq!(q.wheres.len(), 2);
374    }
375
376    // ── execute: real sidecars on disk ───────────────────────────────────────
377
378    #[test]
379    fn execute_with_type_returns_only_that_types_folder() {
380        let contacts = [
381            rec("records/contacts/sarah.md", "contact", &[]),
382            rec("records/contacts/mara.md", "contact", &[]),
383        ];
384        let companies = [rec("records/companies/acme.md", "company", &[])];
385        let (_dir, store) = store_with_sidecars(&[
386            ("records/contacts", &contacts),
387            ("records/companies", &companies),
388        ]);
389
390        let got = Query::new().with_type("contact").execute(&store).unwrap();
391
392        assert_eq!(
393            paths(&got),
394            path_set(&["records/contacts/sarah.md", "records/contacts/mara.md"]),
395            "a type query reads its own type-folder sidecar and excludes other types"
396        );
397    }
398
399    #[test]
400    fn execute_type_plus_where_intersects_on_a_custom_field() {
401        let contacts = [
402            rec(
403                "records/contacts/sarah.md",
404                "contact",
405                &[("company", Value::String("acme".into()))],
406            ),
407            rec(
408                "records/contacts/mara.md",
409                "contact",
410                &[("company", Value::String("globex".into()))],
411            ),
412            rec("records/contacts/no-company.md", "contact", &[]),
413        ];
414        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
415
416        let got = Query::new()
417            .with_type("contact")
418            .with_where("company", "acme")
419            .execute(&store)
420            .unwrap();
421
422        assert_eq!(
423            paths(&got),
424            path_set(&["records/contacts/sarah.md"]),
425            "the where clause narrows the type's records to the matching field; \
426             a record missing the key does not match"
427        );
428    }
429
430    #[test]
431    fn execute_multiple_where_clauses_and_together() {
432        let contacts = [
433            rec(
434                "records/contacts/a.md",
435                "contact",
436                &[
437                    ("company", Value::String("acme".into())),
438                    ("status", Value::String("active".into())),
439                ],
440            ),
441            rec(
442                "records/contacts/b.md",
443                "contact",
444                &[
445                    ("company", Value::String("acme".into())),
446                    ("status", Value::String("churned".into())),
447                ],
448            ),
449            rec(
450                "records/contacts/c.md",
451                "contact",
452                &[
453                    ("company", Value::String("globex".into())),
454                    ("status", Value::String("active".into())),
455                ],
456            ),
457        ];
458        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
459
460        let got = Query::new()
461            .with_type("contact")
462            .with_where("company", "acme")
463            .with_where("status", "active")
464            .execute(&store)
465            .unwrap();
466
467        // Only `a` satisfies BOTH clauses. If the clauses were OR'd, `b` and `c`
468        // would leak in.
469        assert_eq!(paths(&got), path_set(&["records/contacts/a.md"]));
470    }
471
472    #[test]
473    fn execute_where_without_type_reads_across_sidecars() {
474        // `find_by_where` scans every sidecar; the same `domain` value lives in
475        // both a contact and a company record, and both come back.
476        let contacts = [rec(
477            "records/contacts/sarah.md",
478            "contact",
479            &[("domain", Value::String("acme.com".into()))],
480        )];
481        let companies = [
482            rec(
483                "records/companies/acme.md",
484                "company",
485                &[("domain", Value::String("acme.com".into()))],
486            ),
487            rec(
488                "records/companies/globex.md",
489                "company",
490                &[("domain", Value::String("globex.com".into()))],
491            ),
492        ];
493        let (_dir, store) = store_with_sidecars(&[
494            ("records/contacts", &contacts),
495            ("records/companies", &companies),
496        ]);
497
498        let got = Query::new()
499            .with_where("domain", "acme.com")
500            .execute(&store)
501            .unwrap();
502
503        assert_eq!(
504            paths(&got),
505            path_set(&["records/contacts/sarah.md", "records/companies/acme.md"]),
506            "a where-only query matches the field across every type-folder sidecar"
507        );
508    }
509
510    #[test]
511    fn execute_with_layer_scopes_by_path() {
512        // Same custom field value present in two layers; the layer scope must
513        // keep only the records under the named layer folder.
514        let source_recs = [rec(
515            "sources/notes/n1.md",
516            "note",
517            &[("topic", Value::String("billing".into()))],
518        )];
519        let record_recs = [rec(
520            "records/notes/n2.md",
521            "note",
522            &[("topic", Value::String("billing".into()))],
523        )];
524        let (_dir, store) = store_with_sidecars(&[
525            ("sources/notes", &source_recs),
526            ("records/notes", &record_recs),
527        ]);
528
529        // Without a layer scope, both layers' records match.
530        let unscoped = Query::new()
531            .with_where("topic", "billing")
532            .execute(&store)
533            .unwrap();
534        assert_eq!(
535            paths(&unscoped),
536            path_set(&["sources/notes/n1.md", "records/notes/n2.md"]),
537        );
538
539        // Scoped to Sources, only the sources-layer record survives.
540        let scoped = Query::new()
541            .with_where("topic", "billing")
542            .with_layer(Layer::Sources)
543            .execute(&store)
544            .unwrap();
545        assert_eq!(
546            paths(&scoped),
547            path_set(&["sources/notes/n1.md"]),
548            "with_layer(Sources) drops the records/-layer record"
549        );
550    }
551
552    #[test]
553    fn execute_where_only_with_layer_confines_sidecar_io_not_just_result() {
554        // The O(entities-in-layer) contract for a `--where`-only query (no
555        // `--type`): `--in <layer>` must scope the *sidecar read*, not merely
556        // filter the result after a whole-store read. Proven structurally — a
557        // corrupt sidecar in another layer would make the read error if it were
558        // touched, so a layer-scoped query that SUCCEEDS is proof the
559        // out-of-scope layer's I/O never happened.
560        let dir = TempDir::new().unwrap();
561        let root = dir.path();
562        fs::write(root.join("DB.md"), DB_MD).unwrap();
563
564        // In-scope layer: a valid sidecar with the matching record.
565        let records_dir = root.join("records/contacts");
566        fs::create_dir_all(&records_dir).unwrap();
567        let match_rec = rec(
568            "records/contacts/sarah.md",
569            "contact",
570            &[("domain", Value::String("acme.com".into()))],
571        );
572        fs::write(
573            records_dir.join("index.jsonl"),
574            format!("{}\n", jsonl_line(&match_rec)),
575        )
576        .unwrap();
577
578        // Out-of-scope layer: a CORRUPT sidecar. If a `--in records` query read
579        // it, `read_type_index` would error.
580        let sources_dir = root.join("sources/emails");
581        fs::create_dir_all(&sources_dir).unwrap();
582        fs::write(sources_dir.join("index.jsonl"), "{ not valid json }\n").unwrap();
583
584        let store = Store::open(root).unwrap();
585
586        // Scoped to records: succeeds and returns only the records-layer match,
587        // because the corrupt sources sidecar was never walked.
588        let scoped = Query::new()
589            .with_where("domain", "acme.com")
590            .with_layer(Layer::Records)
591            .execute(&store)
592            .expect("a records-scoped where query must not read the sources sidecar");
593        assert_eq!(paths(&scoped), path_set(&["records/contacts/sarah.md"]));
594
595        // Unscoped: the same query DOES walk every layer and trips over the
596        // corrupt sidecar — proving the corrupt file is real and that only the
597        // layer scope spared the scoped read from reading it.
598        let unscoped = Query::new()
599            .with_where("domain", "acme.com")
600            .execute(&store);
601        assert!(
602            unscoped.is_err(),
603            "an unscoped where query reads every sidecar, including the corrupt one"
604        );
605    }
606
607    #[test]
608    fn execute_full_composition_type_layer_where() {
609        let contacts = [
610            rec(
611                "records/contacts/match.md",
612                "contact",
613                &[("city", Value::String("denver".into()))],
614            ),
615            rec(
616                "records/contacts/wrong-city.md",
617                "contact",
618                &[("city", Value::String("austin".into()))],
619            ),
620        ];
621        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
622
623        let got = Query::new()
624            .with_type("contact")
625            .with_layer(Layer::Records)
626            .with_where("city", "denver")
627            .execute(&store)
628            .unwrap();
629        assert_eq!(paths(&got), path_set(&["records/contacts/match.md"]));
630
631        // The same query scoped to the wrong layer yields nothing, proving the
632        // layer predicate is live in the composed path.
633        let wrong_layer = Query::new()
634            .with_type("contact")
635            .with_layer(Layer::Wiki)
636            .with_where("city", "denver")
637            .execute(&store)
638            .unwrap();
639        assert!(wrong_layer.is_empty());
640    }
641
642    #[test]
643    fn execute_bare_query_selects_no_sidecar() {
644        // A fully bare query (no type, no where, no layer) constrains nothing
645        // and has no selective candidate set, so it returns empty WITHOUT
646        // resolving to every record in the store.
647        let contacts = [rec("records/contacts/sarah.md", "contact", &[])];
648        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
649
650        let got = Query::new().execute(&store).unwrap();
651        assert!(
652            got.is_empty(),
653            "an unconstrained query resolves to empty, not to every record"
654        );
655    }
656
657    #[test]
658    fn execute_layer_only_enumerates_that_layer() {
659        // Regression (finding #47): a layer-only query (`--in <layer>` with no
660        // type/where) must enumerate that layer's records, not silently return
661        // []. Records live in two layers; the scope keeps only the named one.
662        let contacts = [rec("records/contacts/sarah.md", "contact", &[])];
663        let emails = [rec("sources/emails/e.md", "email", &[])];
664        let (_dir, store) =
665            store_with_sidecars(&[("records/contacts", &contacts), ("sources/emails", &emails)]);
666
667        let records = Query::new()
668            .with_layer(Layer::Records)
669            .execute(&store)
670            .unwrap();
671        assert_eq!(
672            paths(&records),
673            path_set(&["records/contacts/sarah.md"]),
674            "a layer-only query enumerates that layer, excluding other layers"
675        );
676
677        let sources = Query::new()
678            .with_layer(Layer::Sources)
679            .execute(&store)
680            .unwrap();
681        assert_eq!(
682            paths(&sources),
683            path_set(&["sources/emails/e.md"]),
684            "the sources-layer scope returns the sources records"
685        );
686    }
687
688    #[test]
689    fn execute_type_finds_records_filed_outside_canonical_layer() {
690        // Regression (finding #42): the folder layout is convention, not
691        // enforcement (SPEC). A `contact` filed under sources/ and a custom
692        // `screenshot` that only ever lives under sources/ must both be found
693        // by `--type`, which filters on the frontmatter type — not the type's
694        // canonical layer.
695        let source_contacts = [rec("sources/foo/jane.md", "contact", &[])];
696        let record_contacts = [rec("records/contacts/sarah.md", "contact", &[])];
697        let screenshots = [rec("sources/screenshots/shot1.md", "screenshot", &[])];
698        let (_dir, store) = store_with_sidecars(&[
699            ("sources/foo", &source_contacts),
700            ("records/contacts", &record_contacts),
701            ("sources/screenshots", &screenshots),
702        ]);
703
704        // `--type contact` returns BOTH the canonical and the non-canonical-
705        // layer record (jane under sources/, sarah under records/).
706        let contacts = Query::new().with_type("contact").execute(&store).unwrap();
707        assert_eq!(
708            paths(&contacts),
709            path_set(&["records/contacts/sarah.md", "sources/foo/jane.md"]),
710            "a type query spans every layer the type is filed under"
711        );
712
713        // A custom type that only ever lives under sources/ is still found.
714        let shots = Query::new()
715            .with_type("screenshot")
716            .execute(&store)
717            .unwrap();
718        assert_eq!(
719            paths(&shots),
720            path_set(&["sources/screenshots/shot1.md"]),
721            "a type filed entirely under sources/ is visible to --type"
722        );
723
724        // `--type contact --in sources` resolves to the sources-layer contact,
725        // not [] (the previously-dead --type/--in combination).
726        let in_sources = Query::new()
727            .with_type("contact")
728            .with_layer(Layer::Sources)
729            .execute(&store)
730            .unwrap();
731        assert_eq!(
732            paths(&in_sources),
733            path_set(&["sources/foo/jane.md"]),
734            "--type X --in <layer> returns the records of that type under the layer"
735        );
736
737        // And `--type contact --in records` keeps only the records-layer one.
738        let in_records = Query::new()
739            .with_type("contact")
740            .with_layer(Layer::Records)
741            .execute(&store)
742            .unwrap();
743        assert_eq!(
744            paths(&in_records),
745            path_set(&["records/contacts/sarah.md"]),
746            "the layer scope confines a type query to the named layer"
747        );
748    }
749
750    #[test]
751    fn execute_tag_membership_via_where() {
752        let mut urgent = rec("records/tasks/t1.md", "task", &[]);
753        urgent.tags = vec!["urgent".into(), "ops".into()];
754        let mut calm = rec("records/tasks/t2.md", "task", &[]);
755        calm.tags = vec!["ops".into()];
756        let recs = [urgent, calm];
757        let (_dir, store) = store_with_sidecars(&[("records/tasks", &recs)]);
758
759        let got = Query::new()
760            .with_type("task")
761            .with_where("tags", "urgent")
762            .execute(&store)
763            .unwrap();
764        assert_eq!(
765            paths(&got),
766            path_set(&["records/tasks/t1.md"]),
767            "tags match on membership: only the record carrying the tag matches"
768        );
769    }
770
771    #[test]
772    fn execute_matches_numeric_and_bool_fields_from_string_predicate() {
773        let recs = [
774            rec(
775                "records/invoices/paid.md",
776                "invoice",
777                &[
778                    ("amount", Value::Number(42.into())),
779                    ("paid", Value::Bool(true)),
780                ],
781            ),
782            rec(
783                "records/invoices/unpaid.md",
784                "invoice",
785                &[
786                    ("amount", Value::Number(99.into())),
787                    ("paid", Value::Bool(false)),
788                ],
789            ),
790        ];
791        let (_dir, store) = store_with_sidecars(&[("records/invoices", &recs)]);
792
793        let by_amount = Query::new()
794            .with_type("invoice")
795            .with_where("amount", "42")
796            .execute(&store)
797            .unwrap();
798        assert_eq!(
799            paths(&by_amount),
800            path_set(&["records/invoices/paid.md"]),
801            "a JSON number matches the string form of the predicate"
802        );
803
804        let by_paid = Query::new()
805            .with_type("invoice")
806            .with_where("paid", "true")
807            .execute(&store)
808            .unwrap();
809        assert_eq!(
810            paths(&by_paid),
811            path_set(&["records/invoices/paid.md"]),
812            "a JSON bool matches \"true\"/\"false\""
813        );
814    }
815
816    #[test]
817    fn execute_honors_last_write_wins_in_sidecar() {
818        // Two JSONL lines for the same path: the later supersedes the earlier
819        // (read_type_index applies last-write-wins). A query on the superseding
820        // field must match, and one on the superseded field must not.
821        let dir = TempDir::new().unwrap();
822        let root = dir.path();
823        fs::write(root.join("DB.md"), DB_MD).unwrap();
824        let folder = root.join("records/contacts");
825        fs::create_dir_all(&folder).unwrap();
826
827        let old = rec(
828            "records/contacts/sarah.md",
829            "contact",
830            &[("status", Value::String("lead".into()))],
831        );
832        let new = rec(
833            "records/contacts/sarah.md",
834            "contact",
835            &[("status", Value::String("customer".into()))],
836        );
837        fs::write(
838            folder.join("index.jsonl"),
839            format!("{}\n{}\n", jsonl_line(&old), jsonl_line(&new)),
840        )
841        .unwrap();
842        let store = Store::open(root).unwrap();
843
844        let superseding = Query::new()
845            .with_type("contact")
846            .with_where("status", "customer")
847            .execute(&store)
848            .unwrap();
849        assert_eq!(superseding.len(), 1, "the superseding line's value matches");
850
851        let superseded = Query::new()
852            .with_type("contact")
853            .with_where("status", "lead")
854            .execute(&store)
855            .unwrap();
856        assert!(
857            superseded.is_empty(),
858            "the superseded line's value no longer matches after last-write-wins"
859        );
860    }
861
862    #[test]
863    fn execute_returns_full_records_not_just_paths() {
864        // The contract returns full IndexRecords straight from the sidecar:
865        // summary, tags, links, and fields must survive the round-trip.
866        let mut r = rec(
867            "records/contacts/sarah.md",
868            "contact",
869            &[("company", Value::String("acme".into()))],
870        );
871        r.summary = "Renewal champion".into();
872        r.tags = vec!["vip".into()];
873        r.links = vec!["wiki/people/sarah-chen.md".into()];
874        let recs = [r];
875        let (_dir, store) = store_with_sidecars(&[("records/contacts", &recs)]);
876
877        let got = Query::new().with_type("contact").execute(&store).unwrap();
878        assert_eq!(got.len(), 1);
879        let only = &got[0];
880        assert_eq!(only.summary, "Renewal champion");
881        assert_eq!(only.tags, vec!["vip".to_string()]);
882        assert_eq!(only.links, vec!["wiki/people/sarah-chen.md".to_string()]);
883        assert_eq!(
884            only.fields.get("company"),
885            Some(&Value::String("acme".into())),
886            "type-specific fields come back verbatim for on-demand use"
887        );
888    }
889
890    // ── Pure matcher logic (no store I/O) ────────────────────────────────────
891
892    #[test]
893    fn record_matches_where_on_typed_columns() {
894        let mut r = rec("records/contacts/x.md", "contact", &[]);
895        r.summary = "hello".into();
896
897        assert!(record_matches_where(&r, "type", "contact"));
898        assert!(!record_matches_where(&r, "type", "company"));
899        assert!(record_matches_where(&r, "summary", "hello"));
900        assert!(!record_matches_where(&r, "summary", "goodbye"));
901        assert!(record_matches_where(&r, "path", "records/contacts/x.md"));
902        assert!(!record_matches_where(&r, "path", "records/contacts/y.md"));
903    }
904
905    #[test]
906    fn record_matches_where_on_timestamps_uses_rfc3339() {
907        let mut r = rec("records/meetings/m.md", "meeting", &[]);
908        let ts = chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00+00:00").unwrap();
909        r.created = Some(ts);
910
911        assert!(record_matches_where(
912            &r,
913            "created",
914            "2026-05-29T12:00:00+00:00"
915        ));
916        assert!(!record_matches_where(
917            &r,
918            "created",
919            "2026-05-29T13:00:00+00:00"
920        ));
921        // `updated` is unset → never matches, even the same instant.
922        assert!(!record_matches_where(
923            &r,
924            "updated",
925            "2026-05-29T12:00:00+00:00"
926        ));
927    }
928
929    #[test]
930    fn record_matches_where_timestamp_z_and_offset_spellings_are_equal() {
931        // Regression: the in-memory filter compared `to_rfc3339()` (always the
932        // `+00:00` form) to the raw predicate string, so a `Z`-spelled query of
933        // the identical instant silently failed — and disagreed with the
934        // `Store::find_by_where_in` sidecar pre-filter (instant-based),
935        // dropping real matches. Both spellings must compare equal now.
936        let mut stored_z = rec("records/meetings/m.md", "meeting", &[]);
937        stored_z.created =
938            Some(chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00Z").unwrap());
939        assert!(record_matches_where(
940            &stored_z,
941            "created",
942            "2026-05-29T12:00:00Z"
943        ));
944        assert!(record_matches_where(
945            &stored_z,
946            "created",
947            "2026-05-29T12:00:00+00:00"
948        ));
949
950        // Stored as `+00:00`, queried as `Z` — this is the spelling pair that
951        // failed before the fix.
952        let mut stored_offset = rec("records/meetings/n.md", "meeting", &[]);
953        stored_offset.created =
954            Some(chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00+00:00").unwrap());
955        assert!(record_matches_where(
956            &stored_offset,
957            "created",
958            "2026-05-29T12:00:00Z"
959        ));
960
961        // A different instant still does not match; an unparseable value is false.
962        assert!(!record_matches_where(
963            &stored_z,
964            "created",
965            "2026-05-29T13:00:00Z"
966        ));
967        assert!(!record_matches_where(
968            &stored_z,
969            "created",
970            "not-a-timestamp"
971        ));
972    }
973
974    #[test]
975    fn record_matches_where_absent_field_is_false() {
976        let r = rec("records/contacts/x.md", "contact", &[]);
977        assert!(
978            !record_matches_where(&r, "nonexistent", "anything"),
979            "an absent frontmatter key never matches"
980        );
981    }
982
983    #[test]
984    fn json_value_matches_covers_scalars_and_arrays() {
985        assert!(json_value_matches(&Value::String("acme".into()), "acme"));
986        assert!(!json_value_matches(&Value::String("acme".into()), "globex"));
987
988        assert!(json_value_matches(&Value::Number(42.into()), "42"));
989        assert!(!json_value_matches(&Value::Number(42.into()), "43"));
990
991        assert!(json_value_matches(&Value::Bool(true), "true"));
992        assert!(json_value_matches(&Value::Bool(false), "false"));
993        assert!(!json_value_matches(&Value::Bool(true), "false"));
994
995        let arr = Value::Array(vec![Value::String("a".into()), Value::String("b".into())]);
996        assert!(json_value_matches(&arr, "b"), "array matches on membership");
997        assert!(!json_value_matches(&arr, "c"));
998    }
999
1000    #[test]
1001    fn json_value_matches_null_and_object_never_match() {
1002        assert!(!json_value_matches(&Value::Null, ""));
1003        assert!(!json_value_matches(&Value::Null, "null"));
1004        let obj = serde_json::json!({"k": "v"});
1005        assert!(!json_value_matches(&obj, "v"));
1006    }
1007
1008    #[test]
1009    fn record_in_layer_keys_off_first_path_component() {
1010        let s = rec("sources/emails/e.md", "email", &[]);
1011        let r = rec("records/contacts/c.md", "contact", &[]);
1012        let w = rec("wiki/people/p.md", "wiki-page", &[]);
1013
1014        assert!(record_in_layer(&s, Layer::Sources));
1015        assert!(!record_in_layer(&s, Layer::Records));
1016        assert!(record_in_layer(&r, Layer::Records));
1017        assert!(!record_in_layer(&r, Layer::Wiki));
1018        assert!(record_in_layer(&w, Layer::Wiki));
1019        assert!(!record_in_layer(&w, Layer::Sources));
1020    }
1021
1022    #[test]
1023    fn filter_candidates_skips_already_applied_where_clause() {
1024        // Simulate the find_by_where path: the first clause is "already applied"
1025        // by the sidecar reader, so filter_candidates must skip it and only
1026        // enforce the remaining clause. A record satisfying only the (skipped)
1027        // first clause but NOT the second must still be dropped.
1028        let q = Query::new()
1029            .with_where("company", "acme")
1030            .with_where("status", "active");
1031
1032        let keep = rec(
1033            "records/contacts/keep.md",
1034            "contact",
1035            &[
1036                ("company", Value::String("acme".into())),
1037                ("status", Value::String("active".into())),
1038            ],
1039        );
1040        let drop = rec(
1041            "records/contacts/drop.md",
1042            "contact",
1043            &[
1044                ("company", Value::String("acme".into())),
1045                ("status", Value::String("churned".into())),
1046            ],
1047        );
1048
1049        let out = q.filter_candidates(vec![keep, drop], false, 1);
1050        assert_eq!(
1051            paths(&out),
1052            path_set(&["records/contacts/keep.md"]),
1053            "the second clause is enforced even when the first is pre-applied"
1054        );
1055    }
1056
1057    #[test]
1058    fn filter_candidates_enforces_type_when_not_preapplied() {
1059        // When the candidate set did NOT come from find_by_type (type_applied =
1060        // false), filter_candidates must still drop records of the wrong type.
1061        let q = Query::new().with_type("contact");
1062        let contact = rec("records/contacts/c.md", "contact", &[]);
1063        let company = rec("records/companies/co.md", "company", &[]);
1064
1065        let out = q.filter_candidates(vec![contact, company], false, 0);
1066        assert_eq!(paths(&out), path_set(&["records/contacts/c.md"]));
1067    }
1068
1069    /// Local guard: the test fixtures write sidecars under the same canonical
1070    /// folders the store reader derives, so a `with_type` query finds them.
1071    /// If this drifts, the integration tests above silently weaken — assert the
1072    /// convention explicitly.
1073    #[test]
1074    fn fixture_canonical_folders_match_store_expectations() {
1075        let contacts = [rec("records/contacts/x.md", "contact", &[])];
1076        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
1077        // `contact` records live at records/contacts/ — the same folder the
1078        // fixture wrote — so the type read is non-empty.
1079        let got = store.find_by_type("contact").unwrap();
1080        assert_eq!(got.len(), 1, "fixture folder == store's canonical folder");
1081    }
1082}