Skip to main content

dbmd_core/
query.rs

1//! `query` — Dataview-style filters, **sidecar-backed**.
2//!
3//! Resolves against the type-folder `index.jsonl` sidecar(s) via
4//! [`Store::find_by_type`] / [`Store::find_by_where`] /
5//! [`Store::read_type_index`] — one sequential, complete read per type-folder,
6//! cold-cache-proof — **never** a walk-and-parse. Returns full
7//! [`IndexRecord`]s straight from the sidecar (path + fields + summary +
8//! links); the caller opens the underlying file only if it needs the body.
9//!
10//! Backs `dbmd search --type/--where`, `dbmd fm query`, `dbmd index query`, and
11//! `dbmd graph backlinks --type/--in`.
12
13use chrono::{DateTime, FixedOffset};
14use serde_json::Value;
15
16use crate::index::IndexRecord;
17use crate::store::{Layer, Store, StoreError};
18
19/// A composable, sidecar-backed filter over a store's records.
20///
21/// Build with [`Query::new`] and the `with_*` methods, then [`Query::execute`].
22/// Multiple [`Query::with_where`] clauses AND together (intersection over the
23/// sidecar records).
24#[derive(Debug, Clone, Default)]
25pub struct Query {
26    /// `type` predicate (`with_type`).
27    type_: Option<String>,
28    /// Layer scope (`with_layer` / `--in <layer>`).
29    layer: Option<Layer>,
30    /// `key=value` frontmatter predicates, ANDed.
31    wheres: Vec<(String, String)>,
32}
33
34impl Query {
35    /// Start a new, empty query (matches everything until narrowed).
36    pub fn new() -> Self {
37        Self::default()
38    }
39
40    /// Restrict to a single `type` (frontmatter `type` predicate).
41    ///
42    /// Setting it again replaces the previous value — a query has at most one
43    /// `type` (a record carries exactly one `type`, so two types would never
44    /// intersect).
45    pub fn with_type(mut self, type_: &str) -> Self {
46        self.type_ = Some(type_.to_string());
47        self
48    }
49
50    /// Restrict to one layer (`Sources` / `Records`) — scopes which
51    /// sidecars' records survive. Setting it again replaces the previous layer.
52    pub fn with_layer(mut self, layer: Layer) -> Self {
53        self.layer = Some(layer);
54        self
55    }
56
57    /// Add a `key=value` frontmatter predicate; chains as AND with any others
58    /// (intersection over the sidecar records). Repeating the same `key` adds a
59    /// second clause — both must hold — rather than replacing the first.
60    pub fn with_where(mut self, key: &str, value: &str) -> Self {
61        self.wheres.push((key.to_string(), value.to_string()));
62        self
63    }
64
65    /// Resolve the query against the relevant type-folder `index.jsonl`
66    /// sidecar(s) and return the matching [`IndexRecord`]s — complete, one
67    /// sequential read per type-folder, no whole-store walk.
68    ///
69    /// The candidate set comes from the most selective frozen sidecar reader,
70    /// always **layer-scoped** when [`with_layer`] is set, so an `--in <layer>`
71    /// scope confines the sidecar walk to that layer's subtree
72    /// (O(entities-in-layer), the interactive-loop contract):
73    ///
74    /// - a `type` predicate reads the sidecars across the named layer (or the
75    ///   whole store when unscoped) and filters by the frontmatter `type`. The
76    ///   folder layout is convention, not enforcement (SPEC), so a record whose
77    ///   `type` is filed outside that type's canonical layer — a `contact` in
78    ///   `sources/`, a custom `screenshot` that only ever lives in `sources/` —
79    ///   is still found, and `--type X --in <other-layer>` returns exactly the
80    ///   records of that type filed under the other layer rather than always
81    ///   being empty;
82    /// - otherwise the first `where` clause picks the sidecars and pre-filters,
83    ///   scoped to the layer when set;
84    /// - otherwise (a layer scope but no `type`/`where`) the layer's own
85    ///   sidecar records are the candidate set, so `--in <layer>` on its own
86    ///   enumerates that layer instead of silently returning empty.
87    ///
88    /// Every remaining predicate is then applied in memory over the returned
89    /// records — no extra sidecar reads, no walk.
90    ///
91    /// [`with_layer`]: Query::with_layer
92    ///
93    /// A fully bare query (no `type`, no `where`, no layer) constrains nothing
94    /// and has no selective candidate set, so it returns an empty result.
95    pub fn execute(&self, store: &Store) -> Result<Vec<IndexRecord>, StoreError> {
96        // Pick the candidate set from the cheapest frozen sidecar reader, and
97        // remember which predicates that reader has already satisfied so the
98        // in-memory pass doesn't re-test them.
99        let (candidates, type_done, where_done) = if self.type_.is_some() {
100            // A `type` predicate resolves over the named layer's sidecars (or
101            // the whole store when unscoped), filtering by the frontmatter
102            // `type` rather than guessing a single canonical type-folder. This
103            // keeps the result complete across every folder — and every layer —
104            // the type is filed under, so a record filed outside the type's
105            // canonical layer is still returned and `--type X --in <layer>`
106            // resolves correctly. The in-memory pass below applies the `type`
107            // (and layer, when scoped via `--in`) predicate.
108            (store.sidecar_records(self.layer)?, false, 0)
109        } else if let Some((key, value)) = self.wheres.first() {
110            // No type to scope on: let the first `where` clause pick the
111            // sidecars and pre-filter. `self.layer` (when set) confines the
112            // sidecar walk to that layer's subtree, so a `--where`-only query
113            // is O(entities-in-layer), not O(store records) — the in-memory
114            // layer filter below then becomes a no-op for this path. The
115            // remaining clauses AND in memory.
116            (store.find_by_where_in(key, value, self.layer)?, false, 1)
117        } else if let Some(layer) = self.layer {
118            // Layer-only (`--in <layer>` with no type/where): enumerate that
119            // layer's sidecar records. The in-memory layer filter below is a
120            // no-op for this path (the read is already layer-scoped).
121            (store.sidecar_records(Some(layer))?, false, 0)
122        } else {
123            // Nothing selects a sidecar: no walk-free candidate set exists.
124            return Ok(Vec::new());
125        };
126
127        Ok(self.filter_candidates(candidates, type_done, where_done))
128    }
129
130    /// Apply the in-memory predicate pass over a candidate set returned by a
131    /// sidecar reader: the `type` predicate (unless `type_already_applied`,
132    /// when a reader has already guaranteed it), the [`with_layer`] scope, and
133    /// every remaining `where` clause (skipping the first
134    /// `wheres_already_applied`, which [`Store::find_by_where_in`] pre-filtered).
135    /// All surviving predicates AND together.
136    ///
137    /// Split out from [`Query::execute`] so the composition is exercisable over
138    /// hand-built [`IndexRecord`]s independent of the sidecar I/O.
139    ///
140    /// [`with_layer`]: Query::with_layer
141    fn filter_candidates(
142        &self,
143        candidates: Vec<IndexRecord>,
144        type_already_applied: bool,
145        wheres_already_applied: usize,
146    ) -> Vec<IndexRecord> {
147        candidates
148            .into_iter()
149            .filter(|record| {
150                if !type_already_applied {
151                    if let Some(type_) = &self.type_ {
152                        if record.type_ != *type_ {
153                            return false;
154                        }
155                    }
156                }
157                if let Some(layer) = self.layer {
158                    if !record_in_layer(record, layer) {
159                        return false;
160                    }
161                }
162                self.wheres
163                    .iter()
164                    .skip(wheres_already_applied)
165                    .all(|(key, value)| record_matches_where(record, key, value))
166            })
167            .collect()
168    }
169}
170
171/// True if `record`'s store-relative `path` lives under `layer`'s top-level
172/// folder (`sources/` / `records/`). The sidecar readers can return
173/// records from any layer (a `type` folder name is not unique across layers),
174/// so a `with_layer` scope is enforced here on the record's path.
175fn record_in_layer(record: &IndexRecord, layer: Layer) -> bool {
176    record
177        .path
178        .components()
179        .next()
180        .and_then(|c| c.as_os_str().to_str())
181        == Some(layer_dir_name(layer))
182}
183
184/// The top-level folder name for a [`Layer`] (`"sources"` / `"records"` /
185/// `"wiki"`). Kept local so the layer-scope filter is self-contained and does
186/// not couple `query` to the store-walk module's dir-name helpers.
187fn layer_dir_name(layer: Layer) -> &'static str {
188    match layer {
189        Layer::Sources => "sources",
190        Layer::Records => "records",
191    }
192}
193
194/// True if `record` satisfies a single `key=value` frontmatter predicate.
195///
196/// The universal-contract keys map to their typed [`IndexRecord`] columns
197/// (`type`, `summary`, `created`, `updated`, plus the list-valued `tags` /
198/// `links` which match when `value` is one of the members); every other key is
199/// looked up in [`IndexRecord::fields`] and compared with
200/// [`json_value_matches`]. An absent key never matches.
201fn record_matches_where(record: &IndexRecord, key: &str, value: &str) -> bool {
202    match key {
203        "type" => record.type_ == value,
204        "summary" => record.summary == value,
205        "path" => record.path.to_str() == Some(value),
206        // List-valued columns match on membership: `tags=urgent` is true when
207        // `urgent` is one of the file's tags.
208        "tags" => record.tags.iter().any(|t| t == value),
209        "links" => record.links.iter().any(|l| l == value),
210        // Timestamps compare as instants (both sides parsed as RFC3339) so a
211        // `Z`-form query matches a `+00:00`-form stored value and vice versa.
212        // A plain string compare of `to_rfc3339()` would disagree with the
213        // `Store::find_by_where_in` sidecar pre-filter — which this in-memory
214        // pass re-runs over — and silently drop real matches.
215        "created" => timestamp_value_matches(record.created, value),
216        "updated" => timestamp_value_matches(record.updated, value),
217        _ => record
218            .fields
219            .get(key)
220            .is_some_and(|v| json_value_matches(v, value)),
221    }
222}
223
224/// Compare a sidecar [`Value`] against the string `value` from a `key=value`
225/// predicate. The CLI surface is all strings, so matching is defined against
226/// the value's natural string form:
227///
228/// - a string matches when equal;
229/// - a number matches when its canonical render equals `value` (so `42` matches
230///   `"42"`, and `12.5` matches `"12.5"`);
231/// - a bool matches `"true"` / `"false"`;
232/// - an array matches when **any** element matches (so a list-valued custom
233///   field behaves like `tags` — membership, not whole-list equality);
234/// - `null` never matches (a present-but-null field is treated as no value).
235fn json_value_matches(value: &Value, target: &str) -> bool {
236    match value {
237        Value::String(s) => s == target,
238        Value::Number(n) => number_matches(n, target),
239        Value::Bool(b) => b.to_string() == target,
240        Value::Array(items) => items.iter().any(|item| json_value_matches(item, target)),
241        Value::Null => false,
242        // Objects have no scalar form a `key=value` predicate can match.
243        Value::Object(_) => false,
244    }
245}
246
247/// Match a JSON number against a `key=value` predicate. A FLOAT-valued field is
248/// compared NUMERICALLY, not textually: serde_json's canonical f64 render in the
249/// sidecar discards the file's source spelling (`1234.00` -> `1234.0`, `1e3` ->
250/// `1000.0`), so a `to_string()` compare missed the spelling a human reads in
251/// the file. Restricted to the float case so a large INTEGER field keeps exact
252/// matching. MUST stay byte-identical in behavior to [`crate::store`]'s
253/// `number_matches` (the sidecar pre-filter and this in-memory post-filter have
254/// to agree, or a `--where n=…` query returns different rows than `--type X
255/// --where n=…`).
256fn number_matches(n: &serde_json::Number, target: &str) -> bool {
257    if n.to_string() == target {
258        return true;
259    }
260    if n.is_f64() {
261        if let (Some(stored), Ok(q)) = (n.as_f64(), target.parse::<f64>()) {
262            return stored == q;
263        }
264    }
265    false
266}
267
268/// Match a stored instant against a `key=value` predicate by parsing `value` as
269/// RFC3339 and comparing instants. A plain string compare of `to_rfc3339()`
270/// (which always emits the numeric `+00:00` offset, never `Z`) would reject a
271/// `…Z` query against the identical moment, and disagree with the sidecar
272/// pre-filter [`Store::find_by_where_in`], silently dropping real matches.
273fn timestamp_value_matches(stored: Option<DateTime<FixedOffset>>, value: &str) -> bool {
274    match (stored, DateTime::parse_from_rfc3339(value)) {
275        (Some(stored), Ok(queried)) => stored == queried,
276        _ => false,
277    }
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283    use crate::store::Store;
284    use std::fs;
285    use std::path::PathBuf;
286    use tempfile::TempDir;
287
288    // ── Fixtures ─────────────────────────────────────────────────────────────
289
290    /// Build an [`IndexRecord`] with the given store-relative path, type, and
291    /// extra (`fields`) frontmatter, leaving the timestamp/list columns empty.
292    /// Tests that need `tags`/`links`/`created` set them on the returned value.
293    fn rec(path: &str, type_: &str, fields: &[(&str, Value)]) -> IndexRecord {
294        IndexRecord {
295            path: PathBuf::from(path),
296            type_: type_.to_string(),
297            summary: format!("summary of {path}"),
298            tags: Vec::new(),
299            links: Vec::new(),
300            created: None,
301            updated: None,
302            fields: fields
303                .iter()
304                .map(|(k, v)| (k.to_string(), v.clone()))
305                .collect(),
306        }
307    }
308
309    /// Serialize one record to a single JSONL line (what a real sidecar holds).
310    fn jsonl_line(record: &IndexRecord) -> String {
311        serde_json::to_string(record).expect("serialize IndexRecord")
312    }
313
314    /// A minimal but valid `DB.md` marker (a `---` frontmatter block, which
315    /// `parse_db_md` requires; the body is empty so the config is the default).
316    const DB_MD: &str = "---\ntype: db-md\n---\n\n# Test store\n";
317
318    /// Write a temp store: a `DB.md` marker plus an `index.jsonl` sidecar at
319    /// each `(store-relative folder, records)` entry. Returns the temp dir
320    /// (kept alive by the caller) and the opened [`Store`].
321    fn store_with_sidecars(sidecars: &[(&str, &[IndexRecord])]) -> (TempDir, Store) {
322        let dir = TempDir::new().expect("temp dir");
323        let root = dir.path();
324        fs::write(root.join("DB.md"), DB_MD).expect("write DB.md");
325
326        for (folder, records) in sidecars {
327            let folder_abs = root.join(folder);
328            fs::create_dir_all(&folder_abs).expect("create type folder");
329            let body: String = records
330                .iter()
331                .map(|r| format!("{}\n", jsonl_line(r)))
332                .collect();
333            fs::write(folder_abs.join("index.jsonl"), body).expect("write index.jsonl");
334        }
335
336        let store = Store::open(root).expect("open store");
337        (dir, store)
338    }
339
340    /// The set of store-relative path strings in a result set, for order-
341    /// independent assertions.
342    fn paths(records: &[IndexRecord]) -> std::collections::BTreeSet<String> {
343        records
344            .iter()
345            .map(|r| r.path.to_string_lossy().into_owned())
346            .collect()
347    }
348
349    fn path_set(items: &[&str]) -> std::collections::BTreeSet<String> {
350        items.iter().map(|s| s.to_string()).collect()
351    }
352
353    // ── Builder state ────────────────────────────────────────────────────────
354
355    #[test]
356    fn builder_accumulates_predicates() {
357        let q = Query::new()
358            .with_type("contact")
359            .with_layer(Layer::Records)
360            .with_where("company", "acme")
361            .with_where("status", "active");
362
363        assert_eq!(q.type_.as_deref(), Some("contact"));
364        assert_eq!(q.layer, Some(Layer::Records));
365        assert_eq!(
366            q.wheres,
367            vec![
368                ("company".to_string(), "acme".to_string()),
369                ("status".to_string(), "active".to_string()),
370            ],
371            "each with_where appends a distinct clause"
372        );
373    }
374
375    #[test]
376    fn with_type_and_with_layer_replace_rather_than_stack() {
377        let q = Query::new()
378            .with_type("contact")
379            .with_type("company")
380            .with_layer(Layer::Sources)
381            .with_layer(Layer::Records);
382        assert_eq!(q.type_.as_deref(), Some("company"));
383        assert_eq!(q.layer, Some(Layer::Records));
384    }
385
386    #[test]
387    fn repeated_with_where_same_key_keeps_both_clauses() {
388        // Two clauses on the same key must both be retained (range-style AND),
389        // not collapsed to the last one.
390        let q = Query::new()
391            .with_where("updated", "2026-01-01T00:00:00+00:00")
392            .with_where("updated", "2026-02-01T00:00:00+00:00");
393        assert_eq!(q.wheres.len(), 2);
394    }
395
396    // ── execute: real sidecars on disk ───────────────────────────────────────
397
398    #[test]
399    fn execute_with_type_returns_only_that_types_folder() {
400        let contacts = [
401            rec("records/contacts/sarah.md", "contact", &[]),
402            rec("records/contacts/mara.md", "contact", &[]),
403        ];
404        let companies = [rec("records/companies/acme.md", "company", &[])];
405        let (_dir, store) = store_with_sidecars(&[
406            ("records/contacts", &contacts),
407            ("records/companies", &companies),
408        ]);
409
410        let got = Query::new().with_type("contact").execute(&store).unwrap();
411
412        assert_eq!(
413            paths(&got),
414            path_set(&["records/contacts/sarah.md", "records/contacts/mara.md"]),
415            "a type query reads its own type-folder sidecar and excludes other types"
416        );
417    }
418
419    #[test]
420    fn execute_type_plus_where_intersects_on_a_custom_field() {
421        let contacts = [
422            rec(
423                "records/contacts/sarah.md",
424                "contact",
425                &[("company", Value::String("acme".into()))],
426            ),
427            rec(
428                "records/contacts/mara.md",
429                "contact",
430                &[("company", Value::String("globex".into()))],
431            ),
432            rec("records/contacts/no-company.md", "contact", &[]),
433        ];
434        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
435
436        let got = Query::new()
437            .with_type("contact")
438            .with_where("company", "acme")
439            .execute(&store)
440            .unwrap();
441
442        assert_eq!(
443            paths(&got),
444            path_set(&["records/contacts/sarah.md"]),
445            "the where clause narrows the type's records to the matching field; \
446             a record missing the key does not match"
447        );
448    }
449
450    #[test]
451    fn execute_multiple_where_clauses_and_together() {
452        let contacts = [
453            rec(
454                "records/contacts/a.md",
455                "contact",
456                &[
457                    ("company", Value::String("acme".into())),
458                    ("status", Value::String("active".into())),
459                ],
460            ),
461            rec(
462                "records/contacts/b.md",
463                "contact",
464                &[
465                    ("company", Value::String("acme".into())),
466                    ("status", Value::String("churned".into())),
467                ],
468            ),
469            rec(
470                "records/contacts/c.md",
471                "contact",
472                &[
473                    ("company", Value::String("globex".into())),
474                    ("status", Value::String("active".into())),
475                ],
476            ),
477        ];
478        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
479
480        let got = Query::new()
481            .with_type("contact")
482            .with_where("company", "acme")
483            .with_where("status", "active")
484            .execute(&store)
485            .unwrap();
486
487        // Only `a` satisfies BOTH clauses. If the clauses were OR'd, `b` and `c`
488        // would leak in.
489        assert_eq!(paths(&got), path_set(&["records/contacts/a.md"]));
490    }
491
492    #[test]
493    fn execute_where_without_type_reads_across_sidecars() {
494        // `find_by_where` scans every sidecar; the same `domain` value lives in
495        // both a contact and a company record, and both come back.
496        let contacts = [rec(
497            "records/contacts/sarah.md",
498            "contact",
499            &[("domain", Value::String("acme.com".into()))],
500        )];
501        let companies = [
502            rec(
503                "records/companies/acme.md",
504                "company",
505                &[("domain", Value::String("acme.com".into()))],
506            ),
507            rec(
508                "records/companies/globex.md",
509                "company",
510                &[("domain", Value::String("globex.com".into()))],
511            ),
512        ];
513        let (_dir, store) = store_with_sidecars(&[
514            ("records/contacts", &contacts),
515            ("records/companies", &companies),
516        ]);
517
518        let got = Query::new()
519            .with_where("domain", "acme.com")
520            .execute(&store)
521            .unwrap();
522
523        assert_eq!(
524            paths(&got),
525            path_set(&["records/contacts/sarah.md", "records/companies/acme.md"]),
526            "a where-only query matches the field across every type-folder sidecar"
527        );
528    }
529
530    #[test]
531    fn execute_with_layer_scopes_by_path() {
532        // Same custom field value present in two layers; the layer scope must
533        // keep only the records under the named layer folder.
534        let source_recs = [rec(
535            "sources/notes/n1.md",
536            "note",
537            &[("topic", Value::String("billing".into()))],
538        )];
539        let record_recs = [rec(
540            "records/notes/n2.md",
541            "note",
542            &[("topic", Value::String("billing".into()))],
543        )];
544        let (_dir, store) = store_with_sidecars(&[
545            ("sources/notes", &source_recs),
546            ("records/notes", &record_recs),
547        ]);
548
549        // Without a layer scope, both layers' records match.
550        let unscoped = Query::new()
551            .with_where("topic", "billing")
552            .execute(&store)
553            .unwrap();
554        assert_eq!(
555            paths(&unscoped),
556            path_set(&["sources/notes/n1.md", "records/notes/n2.md"]),
557        );
558
559        // Scoped to Sources, only the sources-layer record survives.
560        let scoped = Query::new()
561            .with_where("topic", "billing")
562            .with_layer(Layer::Sources)
563            .execute(&store)
564            .unwrap();
565        assert_eq!(
566            paths(&scoped),
567            path_set(&["sources/notes/n1.md"]),
568            "with_layer(Sources) drops the records/-layer record"
569        );
570    }
571
572    #[test]
573    fn execute_where_only_with_layer_confines_sidecar_io_not_just_result() {
574        // The O(entities-in-layer) contract for a `--where`-only query (no
575        // `--type`): `--in <layer>` must scope the *sidecar read*, not merely
576        // filter the result after a whole-store read. Proven structurally — a
577        // corrupt sidecar in another layer would make the read error if it were
578        // touched, so a layer-scoped query that SUCCEEDS is proof the
579        // out-of-scope layer's I/O never happened.
580        let dir = TempDir::new().unwrap();
581        let root = dir.path();
582        fs::write(root.join("DB.md"), DB_MD).unwrap();
583
584        // In-scope layer: a valid sidecar with the matching record.
585        let records_dir = root.join("records/contacts");
586        fs::create_dir_all(&records_dir).unwrap();
587        let match_rec = rec(
588            "records/contacts/sarah.md",
589            "contact",
590            &[("domain", Value::String("acme.com".into()))],
591        );
592        fs::write(
593            records_dir.join("index.jsonl"),
594            format!("{}\n", jsonl_line(&match_rec)),
595        )
596        .unwrap();
597
598        // Out-of-scope layer: a CORRUPT sidecar. If a `--in records` query read
599        // it, `read_type_index` would error.
600        let sources_dir = root.join("sources/emails");
601        fs::create_dir_all(&sources_dir).unwrap();
602        fs::write(sources_dir.join("index.jsonl"), "{ not valid json }\n").unwrap();
603
604        let store = Store::open(root).unwrap();
605
606        // Scoped to records: succeeds and returns only the records-layer match,
607        // because the corrupt sources sidecar was never walked.
608        let scoped = Query::new()
609            .with_where("domain", "acme.com")
610            .with_layer(Layer::Records)
611            .execute(&store)
612            .expect("a records-scoped where query must not read the sources sidecar");
613        assert_eq!(paths(&scoped), path_set(&["records/contacts/sarah.md"]));
614
615        // Unscoped: the same query DOES walk every layer and trips over the
616        // corrupt sidecar — proving the corrupt file is real and that only the
617        // layer scope spared the scoped read from reading it.
618        let unscoped = Query::new()
619            .with_where("domain", "acme.com")
620            .execute(&store);
621        assert!(
622            unscoped.is_err(),
623            "an unscoped where query reads every sidecar, including the corrupt one"
624        );
625    }
626
627    #[test]
628    fn execute_full_composition_type_layer_where() {
629        let contacts = [
630            rec(
631                "records/contacts/match.md",
632                "contact",
633                &[("city", Value::String("denver".into()))],
634            ),
635            rec(
636                "records/contacts/wrong-city.md",
637                "contact",
638                &[("city", Value::String("austin".into()))],
639            ),
640        ];
641        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
642
643        let got = Query::new()
644            .with_type("contact")
645            .with_layer(Layer::Records)
646            .with_where("city", "denver")
647            .execute(&store)
648            .unwrap();
649        assert_eq!(paths(&got), path_set(&["records/contacts/match.md"]));
650
651        // The same query scoped to the wrong layer yields nothing, proving the
652        // layer predicate is live in the composed path.
653        let wrong_layer = Query::new()
654            .with_type("contact")
655            .with_layer(Layer::Sources)
656            .with_where("city", "denver")
657            .execute(&store)
658            .unwrap();
659        assert!(wrong_layer.is_empty());
660    }
661
662    #[test]
663    fn execute_bare_query_selects_no_sidecar() {
664        // A fully bare query (no type, no where, no layer) constrains nothing
665        // and has no selective candidate set, so it returns empty WITHOUT
666        // resolving to every record in the store.
667        let contacts = [rec("records/contacts/sarah.md", "contact", &[])];
668        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
669
670        let got = Query::new().execute(&store).unwrap();
671        assert!(
672            got.is_empty(),
673            "an unconstrained query resolves to empty, not to every record"
674        );
675    }
676
677    #[test]
678    fn execute_layer_only_enumerates_that_layer() {
679        // Regression (finding #47): a layer-only query (`--in <layer>` with no
680        // type/where) must enumerate that layer's records, not silently return
681        // []. Records live in two layers; the scope keeps only the named one.
682        let contacts = [rec("records/contacts/sarah.md", "contact", &[])];
683        let emails = [rec("sources/emails/e.md", "email", &[])];
684        let (_dir, store) =
685            store_with_sidecars(&[("records/contacts", &contacts), ("sources/emails", &emails)]);
686
687        let records = Query::new()
688            .with_layer(Layer::Records)
689            .execute(&store)
690            .unwrap();
691        assert_eq!(
692            paths(&records),
693            path_set(&["records/contacts/sarah.md"]),
694            "a layer-only query enumerates that layer, excluding other layers"
695        );
696
697        let sources = Query::new()
698            .with_layer(Layer::Sources)
699            .execute(&store)
700            .unwrap();
701        assert_eq!(
702            paths(&sources),
703            path_set(&["sources/emails/e.md"]),
704            "the sources-layer scope returns the sources records"
705        );
706    }
707
708    #[test]
709    fn execute_type_finds_records_filed_outside_canonical_layer() {
710        // Regression (finding #42): the folder layout is convention, not
711        // enforcement (SPEC). A `contact` filed under sources/ and a custom
712        // `screenshot` that only ever lives under sources/ must both be found
713        // by `--type`, which filters on the frontmatter type — not the type's
714        // canonical layer.
715        let source_contacts = [rec("sources/foo/jane.md", "contact", &[])];
716        let record_contacts = [rec("records/contacts/sarah.md", "contact", &[])];
717        let screenshots = [rec("sources/screenshots/shot1.md", "screenshot", &[])];
718        let (_dir, store) = store_with_sidecars(&[
719            ("sources/foo", &source_contacts),
720            ("records/contacts", &record_contacts),
721            ("sources/screenshots", &screenshots),
722        ]);
723
724        // `--type contact` returns BOTH the canonical and the non-canonical-
725        // layer record (jane under sources/, sarah under records/).
726        let contacts = Query::new().with_type("contact").execute(&store).unwrap();
727        assert_eq!(
728            paths(&contacts),
729            path_set(&["records/contacts/sarah.md", "sources/foo/jane.md"]),
730            "a type query spans every layer the type is filed under"
731        );
732
733        // A custom type that only ever lives under sources/ is still found.
734        let shots = Query::new()
735            .with_type("screenshot")
736            .execute(&store)
737            .unwrap();
738        assert_eq!(
739            paths(&shots),
740            path_set(&["sources/screenshots/shot1.md"]),
741            "a type filed entirely under sources/ is visible to --type"
742        );
743
744        // `--type contact --in sources` resolves to the sources-layer contact,
745        // not [] (the previously-dead --type/--in combination).
746        let in_sources = Query::new()
747            .with_type("contact")
748            .with_layer(Layer::Sources)
749            .execute(&store)
750            .unwrap();
751        assert_eq!(
752            paths(&in_sources),
753            path_set(&["sources/foo/jane.md"]),
754            "--type X --in <layer> returns the records of that type under the layer"
755        );
756
757        // And `--type contact --in records` keeps only the records-layer one.
758        let in_records = Query::new()
759            .with_type("contact")
760            .with_layer(Layer::Records)
761            .execute(&store)
762            .unwrap();
763        assert_eq!(
764            paths(&in_records),
765            path_set(&["records/contacts/sarah.md"]),
766            "the layer scope confines a type query to the named layer"
767        );
768    }
769
770    #[test]
771    fn execute_tag_membership_via_where() {
772        let mut urgent = rec("records/tasks/t1.md", "task", &[]);
773        urgent.tags = vec!["urgent".into(), "ops".into()];
774        let mut calm = rec("records/tasks/t2.md", "task", &[]);
775        calm.tags = vec!["ops".into()];
776        let recs = [urgent, calm];
777        let (_dir, store) = store_with_sidecars(&[("records/tasks", &recs)]);
778
779        let got = Query::new()
780            .with_type("task")
781            .with_where("tags", "urgent")
782            .execute(&store)
783            .unwrap();
784        assert_eq!(
785            paths(&got),
786            path_set(&["records/tasks/t1.md"]),
787            "tags match on membership: only the record carrying the tag matches"
788        );
789    }
790
791    #[test]
792    fn execute_matches_numeric_and_bool_fields_from_string_predicate() {
793        let recs = [
794            rec(
795                "records/invoices/paid.md",
796                "invoice",
797                &[
798                    ("amount", Value::Number(42.into())),
799                    ("paid", Value::Bool(true)),
800                ],
801            ),
802            rec(
803                "records/invoices/unpaid.md",
804                "invoice",
805                &[
806                    ("amount", Value::Number(99.into())),
807                    ("paid", Value::Bool(false)),
808                ],
809            ),
810        ];
811        let (_dir, store) = store_with_sidecars(&[("records/invoices", &recs)]);
812
813        let by_amount = Query::new()
814            .with_type("invoice")
815            .with_where("amount", "42")
816            .execute(&store)
817            .unwrap();
818        assert_eq!(
819            paths(&by_amount),
820            path_set(&["records/invoices/paid.md"]),
821            "a JSON number matches the string form of the predicate"
822        );
823
824        let by_paid = Query::new()
825            .with_type("invoice")
826            .with_where("paid", "true")
827            .execute(&store)
828            .unwrap();
829        assert_eq!(
830            paths(&by_paid),
831            path_set(&["records/invoices/paid.md"]),
832            "a JSON bool matches \"true\"/\"false\""
833        );
834    }
835
836    #[test]
837    fn execute_honors_last_write_wins_in_sidecar() {
838        // Two JSONL lines for the same path: the later supersedes the earlier
839        // (read_type_index applies last-write-wins). A query on the superseding
840        // field must match, and one on the superseded field must not.
841        let dir = TempDir::new().unwrap();
842        let root = dir.path();
843        fs::write(root.join("DB.md"), DB_MD).unwrap();
844        let folder = root.join("records/contacts");
845        fs::create_dir_all(&folder).unwrap();
846
847        let old = rec(
848            "records/contacts/sarah.md",
849            "contact",
850            &[("status", Value::String("lead".into()))],
851        );
852        let new = rec(
853            "records/contacts/sarah.md",
854            "contact",
855            &[("status", Value::String("customer".into()))],
856        );
857        fs::write(
858            folder.join("index.jsonl"),
859            format!("{}\n{}\n", jsonl_line(&old), jsonl_line(&new)),
860        )
861        .unwrap();
862        let store = Store::open(root).unwrap();
863
864        let superseding = Query::new()
865            .with_type("contact")
866            .with_where("status", "customer")
867            .execute(&store)
868            .unwrap();
869        assert_eq!(superseding.len(), 1, "the superseding line's value matches");
870
871        let superseded = Query::new()
872            .with_type("contact")
873            .with_where("status", "lead")
874            .execute(&store)
875            .unwrap();
876        assert!(
877            superseded.is_empty(),
878            "the superseded line's value no longer matches after last-write-wins"
879        );
880    }
881
882    #[test]
883    fn execute_returns_full_records_not_just_paths() {
884        // The contract returns full IndexRecords straight from the sidecar:
885        // summary, tags, links, and fields must survive the round-trip.
886        let mut r = rec(
887            "records/contacts/sarah.md",
888            "contact",
889            &[("company", Value::String("acme".into()))],
890        );
891        r.summary = "Renewal champion".into();
892        r.tags = vec!["vip".into()];
893        r.links = vec!["records/profiles/sarah-chen.md".into()];
894        let recs = [r];
895        let (_dir, store) = store_with_sidecars(&[("records/contacts", &recs)]);
896
897        let got = Query::new().with_type("contact").execute(&store).unwrap();
898        assert_eq!(got.len(), 1);
899        let only = &got[0];
900        assert_eq!(only.summary, "Renewal champion");
901        assert_eq!(only.tags, vec!["vip".to_string()]);
902        assert_eq!(
903            only.links,
904            vec!["records/profiles/sarah-chen.md".to_string()]
905        );
906        assert_eq!(
907            only.fields.get("company"),
908            Some(&Value::String("acme".into())),
909            "type-specific fields come back verbatim for on-demand use"
910        );
911    }
912
913    // ── Pure matcher logic (no store I/O) ────────────────────────────────────
914
915    #[test]
916    fn record_matches_where_on_typed_columns() {
917        let mut r = rec("records/contacts/x.md", "contact", &[]);
918        r.summary = "hello".into();
919
920        assert!(record_matches_where(&r, "type", "contact"));
921        assert!(!record_matches_where(&r, "type", "company"));
922        assert!(record_matches_where(&r, "summary", "hello"));
923        assert!(!record_matches_where(&r, "summary", "goodbye"));
924        assert!(record_matches_where(&r, "path", "records/contacts/x.md"));
925        assert!(!record_matches_where(&r, "path", "records/contacts/y.md"));
926    }
927
928    #[test]
929    fn record_matches_where_on_timestamps_uses_rfc3339() {
930        let mut r = rec("records/meetings/m.md", "meeting", &[]);
931        let ts = chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00+00:00").unwrap();
932        r.created = Some(ts);
933
934        assert!(record_matches_where(
935            &r,
936            "created",
937            "2026-05-29T12:00:00+00:00"
938        ));
939        assert!(!record_matches_where(
940            &r,
941            "created",
942            "2026-05-29T13:00:00+00:00"
943        ));
944        // `updated` is unset → never matches, even the same instant.
945        assert!(!record_matches_where(
946            &r,
947            "updated",
948            "2026-05-29T12:00:00+00:00"
949        ));
950    }
951
952    #[test]
953    fn record_matches_where_timestamp_z_and_offset_spellings_are_equal() {
954        // Regression: the in-memory filter compared `to_rfc3339()` (always the
955        // `+00:00` form) to the raw predicate string, so a `Z`-spelled query of
956        // the identical instant silently failed — and disagreed with the
957        // `Store::find_by_where_in` sidecar pre-filter (instant-based),
958        // dropping real matches. Both spellings must compare equal now.
959        let mut stored_z = rec("records/meetings/m.md", "meeting", &[]);
960        stored_z.created =
961            Some(chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00Z").unwrap());
962        assert!(record_matches_where(
963            &stored_z,
964            "created",
965            "2026-05-29T12:00:00Z"
966        ));
967        assert!(record_matches_where(
968            &stored_z,
969            "created",
970            "2026-05-29T12:00:00+00:00"
971        ));
972
973        // Stored as `+00:00`, queried as `Z` — this is the spelling pair that
974        // failed before the fix.
975        let mut stored_offset = rec("records/meetings/n.md", "meeting", &[]);
976        stored_offset.created =
977            Some(chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00+00:00").unwrap());
978        assert!(record_matches_where(
979            &stored_offset,
980            "created",
981            "2026-05-29T12:00:00Z"
982        ));
983
984        // A different instant still does not match; an unparseable value is false.
985        assert!(!record_matches_where(
986            &stored_z,
987            "created",
988            "2026-05-29T13:00:00Z"
989        ));
990        assert!(!record_matches_where(
991            &stored_z,
992            "created",
993            "not-a-timestamp"
994        ));
995    }
996
997    #[test]
998    fn record_matches_where_absent_field_is_false() {
999        let r = rec("records/contacts/x.md", "contact", &[]);
1000        assert!(
1001            !record_matches_where(&r, "nonexistent", "anything"),
1002            "an absent frontmatter key never matches"
1003        );
1004    }
1005
1006    #[test]
1007    fn json_value_matches_covers_scalars_and_arrays() {
1008        assert!(json_value_matches(&Value::String("acme".into()), "acme"));
1009        assert!(!json_value_matches(&Value::String("acme".into()), "globex"));
1010
1011        assert!(json_value_matches(&Value::Number(42.into()), "42"));
1012        assert!(!json_value_matches(&Value::Number(42.into()), "43"));
1013
1014        assert!(json_value_matches(&Value::Bool(true), "true"));
1015        assert!(json_value_matches(&Value::Bool(false), "false"));
1016        assert!(!json_value_matches(&Value::Bool(true), "false"));
1017
1018        let arr = Value::Array(vec![Value::String("a".into()), Value::String("b".into())]);
1019        assert!(json_value_matches(&arr, "b"), "array matches on membership");
1020        assert!(!json_value_matches(&arr, "c"));
1021    }
1022
1023    #[test]
1024    fn json_value_matches_null_and_object_never_match() {
1025        assert!(!json_value_matches(&Value::Null, ""));
1026        assert!(!json_value_matches(&Value::Null, "null"));
1027        let obj = serde_json::json!({"k": "v"});
1028        assert!(!json_value_matches(&obj, "v"));
1029    }
1030
1031    #[test]
1032    fn record_in_layer_keys_off_first_path_component() {
1033        let s = rec("sources/emails/e.md", "email", &[]);
1034        let r = rec("records/contacts/c.md", "contact", &[]);
1035        // A conclusion record (the former wiki-page) lives in the records layer.
1036        let c = rec("records/profiles/p.md", "profile", &[]);
1037
1038        assert!(record_in_layer(&s, Layer::Sources));
1039        assert!(!record_in_layer(&s, Layer::Records));
1040        assert!(record_in_layer(&r, Layer::Records));
1041        assert!(!record_in_layer(&r, Layer::Sources));
1042        assert!(record_in_layer(&c, Layer::Records));
1043        assert!(!record_in_layer(&c, Layer::Sources));
1044    }
1045
1046    #[test]
1047    fn filter_candidates_skips_already_applied_where_clause() {
1048        // Simulate the find_by_where path: the first clause is "already applied"
1049        // by the sidecar reader, so filter_candidates must skip it and only
1050        // enforce the remaining clause. A record satisfying only the (skipped)
1051        // first clause but NOT the second must still be dropped.
1052        let q = Query::new()
1053            .with_where("company", "acme")
1054            .with_where("status", "active");
1055
1056        let keep = rec(
1057            "records/contacts/keep.md",
1058            "contact",
1059            &[
1060                ("company", Value::String("acme".into())),
1061                ("status", Value::String("active".into())),
1062            ],
1063        );
1064        let drop = rec(
1065            "records/contacts/drop.md",
1066            "contact",
1067            &[
1068                ("company", Value::String("acme".into())),
1069                ("status", Value::String("churned".into())),
1070            ],
1071        );
1072
1073        let out = q.filter_candidates(vec![keep, drop], false, 1);
1074        assert_eq!(
1075            paths(&out),
1076            path_set(&["records/contacts/keep.md"]),
1077            "the second clause is enforced even when the first is pre-applied"
1078        );
1079    }
1080
1081    #[test]
1082    fn filter_candidates_enforces_type_when_not_preapplied() {
1083        // When the candidate set did NOT come from find_by_type (type_applied =
1084        // false), filter_candidates must still drop records of the wrong type.
1085        let q = Query::new().with_type("contact");
1086        let contact = rec("records/contacts/c.md", "contact", &[]);
1087        let company = rec("records/companies/co.md", "company", &[]);
1088
1089        let out = q.filter_candidates(vec![contact, company], false, 0);
1090        assert_eq!(paths(&out), path_set(&["records/contacts/c.md"]));
1091    }
1092
1093    /// Local guard: the test fixtures write sidecars under the same canonical
1094    /// folders the store reader derives, so a `with_type` query finds them.
1095    /// If this drifts, the integration tests above silently weaken — assert the
1096    /// convention explicitly.
1097    #[test]
1098    fn fixture_canonical_folders_match_store_expectations() {
1099        let contacts = [rec("records/contacts/x.md", "contact", &[])];
1100        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
1101        // `contact` records live at records/contacts/ — the same folder the
1102        // fixture wrote — so the type read is non-empty.
1103        let got = store.find_by_type("contact").unwrap();
1104        assert_eq!(got.len(), 1, "fixture folder == store's canonical folder");
1105    }
1106}