Skip to main content

dbmd_core/
query.rs

1//! `query` — Dataview-style filters, **sidecar-backed**.
2//!
3//! Resolves against the type-folder `index.jsonl` sidecar(s) via
4//! [`Store::find_by_type`] / [`Store::find_by_where`] /
5//! [`Store::read_type_index`] — one sequential, complete read per type-folder,
6//! cold-cache-proof — **never** a walk-and-parse. Returns full
7//! [`IndexRecord`]s straight from the sidecar (path + fields + summary +
8//! links); the caller opens the underlying file only if it needs the body.
9//!
10//! Backs `dbmd search --type/--where`, `dbmd fm query`, `dbmd index query`, and
11//! `dbmd graph backlinks --type/--in`.
12
13use chrono::{DateTime, FixedOffset};
14use serde_json::Value;
15
16use crate::index::IndexRecord;
17use crate::store::{Layer, Store, StoreError};
18
19/// A composable, sidecar-backed filter over a store's records.
20///
21/// Build with [`Query::new`] and the `with_*` methods, then [`Query::execute`].
22/// Multiple [`Query::with_where`] clauses AND together (intersection over the
23/// sidecar records).
24#[derive(Debug, Clone, Default)]
25pub struct Query {
26    /// `type` predicate (`with_type`).
27    type_: Option<String>,
28    /// Layer scope (`with_layer` / `--in <layer>`).
29    layer: Option<Layer>,
30    /// `key=value` frontmatter predicates, ANDed.
31    wheres: Vec<(String, String)>,
32}
33
34impl Query {
35    /// Start a new, empty query (matches everything until narrowed).
36    pub fn new() -> Self {
37        Self::default()
38    }
39
40    /// Restrict to a single `type` (frontmatter `type` predicate).
41    ///
42    /// Setting it again replaces the previous value — a query has at most one
43    /// `type` (a record carries exactly one `type`, so two types would never
44    /// intersect).
45    pub fn with_type(mut self, type_: &str) -> Self {
46        self.type_ = Some(type_.to_string());
47        self
48    }
49
50    /// Restrict to one layer (`Sources` / `Records`) — scopes which
51    /// sidecars' records survive. Setting it again replaces the previous layer.
52    pub fn with_layer(mut self, layer: Layer) -> Self {
53        self.layer = Some(layer);
54        self
55    }
56
57    /// Add a `key=value` frontmatter predicate; chains as AND with any others
58    /// (intersection over the sidecar records). Repeating the same `key` adds a
59    /// second clause — both must hold — rather than replacing the first.
60    pub fn with_where(mut self, key: &str, value: &str) -> Self {
61        self.wheres.push((key.to_string(), value.to_string()));
62        self
63    }
64
65    /// Resolve the query against the relevant type-folder `index.jsonl`
66    /// sidecar(s) and return the matching [`IndexRecord`]s — complete, one
67    /// sequential read per type-folder, no whole-store walk.
68    ///
69    /// The candidate set comes from the most selective frozen sidecar reader,
70    /// always **layer-scoped** when [`with_layer`] is set, so an `--in <layer>`
71    /// scope confines the sidecar walk to that layer's subtree
72    /// (O(entities-in-layer), the interactive-loop contract):
73    ///
74    /// - a `type` predicate reads the sidecars across the named layer (or the
75    ///   whole store when unscoped) and filters by the frontmatter `type`. The
76    ///   folder layout is convention, not enforcement (SPEC), so a record whose
77    ///   `type` is filed outside that type's canonical layer — a `contact` in
78    ///   `sources/`, a custom `screenshot` that only ever lives in `sources/` —
79    ///   is still found, and `--type X --in <other-layer>` returns exactly the
80    ///   records of that type filed under the other layer rather than always
81    ///   being empty;
82    /// - otherwise the first `where` clause picks the sidecars and pre-filters,
83    ///   scoped to the layer when set;
84    /// - otherwise (a layer scope but no `type`/`where`) the layer's own
85    ///   sidecar records are the candidate set, so `--in <layer>` on its own
86    ///   enumerates that layer instead of silently returning empty.
87    ///
88    /// Every remaining predicate is then applied in memory over the returned
89    /// records — no extra sidecar reads, no walk.
90    ///
91    /// [`with_layer`]: Query::with_layer
92    ///
93    /// A fully bare query (no `type`, no `where`, no layer) constrains nothing
94    /// and has no selective candidate set, so it returns an empty result.
95    pub fn execute(&self, store: &Store) -> Result<Vec<IndexRecord>, StoreError> {
96        // Pick the candidate set from the cheapest frozen sidecar reader, and
97        // remember which predicates that reader has already satisfied so the
98        // in-memory pass doesn't re-test them.
99        let (candidates, type_done, where_done) = if self.type_.is_some() {
100            // A `type` predicate resolves over the named layer's sidecars (or
101            // the whole store when unscoped), filtering by the frontmatter
102            // `type` rather than guessing a single canonical type-folder. This
103            // keeps the result complete across every folder — and every layer —
104            // the type is filed under, so a record filed outside the type's
105            // canonical layer is still returned and `--type X --in <layer>`
106            // resolves correctly. The in-memory pass below applies the `type`
107            // (and layer, when scoped via `--in`) predicate.
108            (store.sidecar_records(self.layer)?, false, 0)
109        } else if let Some((key, value)) = self.wheres.first() {
110            // No type to scope on: let the first `where` clause pick the
111            // sidecars and pre-filter. `self.layer` (when set) confines the
112            // sidecar walk to that layer's subtree, so a `--where`-only query
113            // is O(entities-in-layer), not O(store records) — the in-memory
114            // layer filter below then becomes a no-op for this path. The
115            // remaining clauses AND in memory.
116            (store.find_by_where_in(key, value, self.layer)?, false, 1)
117        } else if let Some(layer) = self.layer {
118            // Layer-only (`--in <layer>` with no type/where): enumerate that
119            // layer's sidecar records. The in-memory layer filter below is a
120            // no-op for this path (the read is already layer-scoped).
121            (store.sidecar_records(Some(layer))?, false, 0)
122        } else {
123            // Nothing selects a sidecar: no walk-free candidate set exists.
124            return Ok(Vec::new());
125        };
126
127        Ok(self.filter_candidates(candidates, type_done, where_done))
128    }
129
130    /// Apply the in-memory predicate pass over a candidate set returned by a
131    /// sidecar reader: the `type` predicate (unless `type_already_applied`,
132    /// when a reader has already guaranteed it), the [`with_layer`] scope, and
133    /// every remaining `where` clause (skipping the first
134    /// `wheres_already_applied`, which [`Store::find_by_where_in`] pre-filtered).
135    /// All surviving predicates AND together.
136    ///
137    /// Split out from [`Query::execute`] so the composition is exercisable over
138    /// hand-built [`IndexRecord`]s independent of the sidecar I/O.
139    ///
140    /// [`with_layer`]: Query::with_layer
141    fn filter_candidates(
142        &self,
143        candidates: Vec<IndexRecord>,
144        type_already_applied: bool,
145        wheres_already_applied: usize,
146    ) -> Vec<IndexRecord> {
147        candidates
148            .into_iter()
149            .filter(|record| {
150                if !type_already_applied {
151                    if let Some(type_) = &self.type_ {
152                        if record.type_ != *type_ {
153                            return false;
154                        }
155                    }
156                }
157                if let Some(layer) = self.layer {
158                    if !record_in_layer(record, layer) {
159                        return false;
160                    }
161                }
162                self.wheres
163                    .iter()
164                    .skip(wheres_already_applied)
165                    .all(|(key, value)| record_matches_where(record, key, value))
166            })
167            .collect()
168    }
169}
170
171/// True if `record`'s store-relative `path` lives under `layer`'s top-level
172/// folder (`sources/` / `records/`). The sidecar readers can return
173/// records from any layer (a `type` folder name is not unique across layers),
174/// so a `with_layer` scope is enforced here on the record's path.
175fn record_in_layer(record: &IndexRecord, layer: Layer) -> bool {
176    record
177        .path
178        .components()
179        .next()
180        .and_then(|c| c.as_os_str().to_str())
181        == Some(layer_dir_name(layer))
182}
183
184/// The top-level folder name for a [`Layer`] (`"sources"` / `"records"` /
185/// `"wiki"`). Kept local so the layer-scope filter is self-contained and does
186/// not couple `query` to the store-walk module's dir-name helpers.
187fn layer_dir_name(layer: Layer) -> &'static str {
188    match layer {
189        Layer::Sources => "sources",
190        Layer::Records => "records",
191    }
192}
193
194/// True if `record` satisfies a single `key=value` frontmatter predicate.
195///
196/// The universal-contract keys map to their typed [`IndexRecord`] columns
197/// (`type`, `summary`, `created`, `updated`, plus the list-valued `tags` /
198/// `links` which match when `value` is one of the members); every other key is
199/// looked up in [`IndexRecord::fields`] and compared with
200/// [`json_value_matches`]. An absent key never matches.
201fn record_matches_where(record: &IndexRecord, key: &str, value: &str) -> bool {
202    match key {
203        "type" => record.type_ == value,
204        "summary" => record.summary == value,
205        "path" => record.path.to_str() == Some(value),
206        // List-valued columns match on membership: `tags=urgent` is true when
207        // `urgent` is one of the file's tags.
208        "tags" => record.tags.iter().any(|t| t == value),
209        "links" => record.links.iter().any(|l| l == value),
210        // Timestamps compare as instants (both sides parsed as RFC3339) so a
211        // `Z`-form query matches a `+00:00`-form stored value and vice versa.
212        // A plain string compare of `to_rfc3339()` would disagree with the
213        // `Store::find_by_where_in` sidecar pre-filter — which this in-memory
214        // pass re-runs over — and silently drop real matches.
215        "created" => timestamp_value_matches(record.created, value),
216        "updated" => timestamp_value_matches(record.updated, value),
217        _ => record
218            .fields
219            .get(key)
220            .is_some_and(|v| json_value_matches(v, value)),
221    }
222}
223
224/// Compare a sidecar [`Value`] against the string `value` from a `key=value`
225/// predicate. The CLI surface is all strings, so matching is defined against
226/// the value's natural string form:
227///
228/// - a string matches when equal;
229/// - a number matches when its canonical render equals `value` (so `42` matches
230///   `"42"`, and `12.5` matches `"12.5"`);
231/// - a bool matches `"true"` / `"false"`;
232/// - an array matches when **any** element matches (so a list-valued custom
233///   field behaves like `tags` — membership, not whole-list equality);
234/// - `null` never matches (a present-but-null field is treated as no value).
235fn json_value_matches(value: &Value, target: &str) -> bool {
236    match value {
237        Value::String(s) => s == target,
238        Value::Number(n) => n.to_string() == target,
239        Value::Bool(b) => b.to_string() == target,
240        Value::Array(items) => items.iter().any(|item| json_value_matches(item, target)),
241        Value::Null => false,
242        // Objects have no scalar form a `key=value` predicate can match.
243        Value::Object(_) => false,
244    }
245}
246
247/// Match a stored instant against a `key=value` predicate by parsing `value` as
248/// RFC3339 and comparing instants. A plain string compare of `to_rfc3339()`
249/// (which always emits the numeric `+00:00` offset, never `Z`) would reject a
250/// `…Z` query against the identical moment, and disagree with the sidecar
251/// pre-filter [`Store::find_by_where_in`], silently dropping real matches.
252fn timestamp_value_matches(stored: Option<DateTime<FixedOffset>>, value: &str) -> bool {
253    match (stored, DateTime::parse_from_rfc3339(value)) {
254        (Some(stored), Ok(queried)) => stored == queried,
255        _ => false,
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use crate::store::Store;
263    use std::fs;
264    use std::path::PathBuf;
265    use tempfile::TempDir;
266
267    // ── Fixtures ─────────────────────────────────────────────────────────────
268
269    /// Build an [`IndexRecord`] with the given store-relative path, type, and
270    /// extra (`fields`) frontmatter, leaving the timestamp/list columns empty.
271    /// Tests that need `tags`/`links`/`created` set them on the returned value.
272    fn rec(path: &str, type_: &str, fields: &[(&str, Value)]) -> IndexRecord {
273        IndexRecord {
274            path: PathBuf::from(path),
275            type_: type_.to_string(),
276            summary: format!("summary of {path}"),
277            tags: Vec::new(),
278            links: Vec::new(),
279            created: None,
280            updated: None,
281            fields: fields
282                .iter()
283                .map(|(k, v)| (k.to_string(), v.clone()))
284                .collect(),
285        }
286    }
287
288    /// Serialize one record to a single JSONL line (what a real sidecar holds).
289    fn jsonl_line(record: &IndexRecord) -> String {
290        serde_json::to_string(record).expect("serialize IndexRecord")
291    }
292
293    /// A minimal but valid `DB.md` marker (a `---` frontmatter block, which
294    /// `parse_db_md` requires; the body is empty so the config is the default).
295    const DB_MD: &str = "---\ntype: db-md\n---\n\n# Test store\n";
296
297    /// Write a temp store: a `DB.md` marker plus an `index.jsonl` sidecar at
298    /// each `(store-relative folder, records)` entry. Returns the temp dir
299    /// (kept alive by the caller) and the opened [`Store`].
300    fn store_with_sidecars(sidecars: &[(&str, &[IndexRecord])]) -> (TempDir, Store) {
301        let dir = TempDir::new().expect("temp dir");
302        let root = dir.path();
303        fs::write(root.join("DB.md"), DB_MD).expect("write DB.md");
304
305        for (folder, records) in sidecars {
306            let folder_abs = root.join(folder);
307            fs::create_dir_all(&folder_abs).expect("create type folder");
308            let body: String = records
309                .iter()
310                .map(|r| format!("{}\n", jsonl_line(r)))
311                .collect();
312            fs::write(folder_abs.join("index.jsonl"), body).expect("write index.jsonl");
313        }
314
315        let store = Store::open(root).expect("open store");
316        (dir, store)
317    }
318
319    /// The set of store-relative path strings in a result set, for order-
320    /// independent assertions.
321    fn paths(records: &[IndexRecord]) -> std::collections::BTreeSet<String> {
322        records
323            .iter()
324            .map(|r| r.path.to_string_lossy().into_owned())
325            .collect()
326    }
327
328    fn path_set(items: &[&str]) -> std::collections::BTreeSet<String> {
329        items.iter().map(|s| s.to_string()).collect()
330    }
331
332    // ── Builder state ────────────────────────────────────────────────────────
333
334    #[test]
335    fn builder_accumulates_predicates() {
336        let q = Query::new()
337            .with_type("contact")
338            .with_layer(Layer::Records)
339            .with_where("company", "acme")
340            .with_where("status", "active");
341
342        assert_eq!(q.type_.as_deref(), Some("contact"));
343        assert_eq!(q.layer, Some(Layer::Records));
344        assert_eq!(
345            q.wheres,
346            vec![
347                ("company".to_string(), "acme".to_string()),
348                ("status".to_string(), "active".to_string()),
349            ],
350            "each with_where appends a distinct clause"
351        );
352    }
353
354    #[test]
355    fn with_type_and_with_layer_replace_rather_than_stack() {
356        let q = Query::new()
357            .with_type("contact")
358            .with_type("company")
359            .with_layer(Layer::Sources)
360            .with_layer(Layer::Records);
361        assert_eq!(q.type_.as_deref(), Some("company"));
362        assert_eq!(q.layer, Some(Layer::Records));
363    }
364
365    #[test]
366    fn repeated_with_where_same_key_keeps_both_clauses() {
367        // Two clauses on the same key must both be retained (range-style AND),
368        // not collapsed to the last one.
369        let q = Query::new()
370            .with_where("updated", "2026-01-01T00:00:00+00:00")
371            .with_where("updated", "2026-02-01T00:00:00+00:00");
372        assert_eq!(q.wheres.len(), 2);
373    }
374
375    // ── execute: real sidecars on disk ───────────────────────────────────────
376
377    #[test]
378    fn execute_with_type_returns_only_that_types_folder() {
379        let contacts = [
380            rec("records/contacts/sarah.md", "contact", &[]),
381            rec("records/contacts/mara.md", "contact", &[]),
382        ];
383        let companies = [rec("records/companies/acme.md", "company", &[])];
384        let (_dir, store) = store_with_sidecars(&[
385            ("records/contacts", &contacts),
386            ("records/companies", &companies),
387        ]);
388
389        let got = Query::new().with_type("contact").execute(&store).unwrap();
390
391        assert_eq!(
392            paths(&got),
393            path_set(&["records/contacts/sarah.md", "records/contacts/mara.md"]),
394            "a type query reads its own type-folder sidecar and excludes other types"
395        );
396    }
397
398    #[test]
399    fn execute_type_plus_where_intersects_on_a_custom_field() {
400        let contacts = [
401            rec(
402                "records/contacts/sarah.md",
403                "contact",
404                &[("company", Value::String("acme".into()))],
405            ),
406            rec(
407                "records/contacts/mara.md",
408                "contact",
409                &[("company", Value::String("globex".into()))],
410            ),
411            rec("records/contacts/no-company.md", "contact", &[]),
412        ];
413        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
414
415        let got = Query::new()
416            .with_type("contact")
417            .with_where("company", "acme")
418            .execute(&store)
419            .unwrap();
420
421        assert_eq!(
422            paths(&got),
423            path_set(&["records/contacts/sarah.md"]),
424            "the where clause narrows the type's records to the matching field; \
425             a record missing the key does not match"
426        );
427    }
428
429    #[test]
430    fn execute_multiple_where_clauses_and_together() {
431        let contacts = [
432            rec(
433                "records/contacts/a.md",
434                "contact",
435                &[
436                    ("company", Value::String("acme".into())),
437                    ("status", Value::String("active".into())),
438                ],
439            ),
440            rec(
441                "records/contacts/b.md",
442                "contact",
443                &[
444                    ("company", Value::String("acme".into())),
445                    ("status", Value::String("churned".into())),
446                ],
447            ),
448            rec(
449                "records/contacts/c.md",
450                "contact",
451                &[
452                    ("company", Value::String("globex".into())),
453                    ("status", Value::String("active".into())),
454                ],
455            ),
456        ];
457        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
458
459        let got = Query::new()
460            .with_type("contact")
461            .with_where("company", "acme")
462            .with_where("status", "active")
463            .execute(&store)
464            .unwrap();
465
466        // Only `a` satisfies BOTH clauses. If the clauses were OR'd, `b` and `c`
467        // would leak in.
468        assert_eq!(paths(&got), path_set(&["records/contacts/a.md"]));
469    }
470
471    #[test]
472    fn execute_where_without_type_reads_across_sidecars() {
473        // `find_by_where` scans every sidecar; the same `domain` value lives in
474        // both a contact and a company record, and both come back.
475        let contacts = [rec(
476            "records/contacts/sarah.md",
477            "contact",
478            &[("domain", Value::String("acme.com".into()))],
479        )];
480        let companies = [
481            rec(
482                "records/companies/acme.md",
483                "company",
484                &[("domain", Value::String("acme.com".into()))],
485            ),
486            rec(
487                "records/companies/globex.md",
488                "company",
489                &[("domain", Value::String("globex.com".into()))],
490            ),
491        ];
492        let (_dir, store) = store_with_sidecars(&[
493            ("records/contacts", &contacts),
494            ("records/companies", &companies),
495        ]);
496
497        let got = Query::new()
498            .with_where("domain", "acme.com")
499            .execute(&store)
500            .unwrap();
501
502        assert_eq!(
503            paths(&got),
504            path_set(&["records/contacts/sarah.md", "records/companies/acme.md"]),
505            "a where-only query matches the field across every type-folder sidecar"
506        );
507    }
508
509    #[test]
510    fn execute_with_layer_scopes_by_path() {
511        // Same custom field value present in two layers; the layer scope must
512        // keep only the records under the named layer folder.
513        let source_recs = [rec(
514            "sources/notes/n1.md",
515            "note",
516            &[("topic", Value::String("billing".into()))],
517        )];
518        let record_recs = [rec(
519            "records/notes/n2.md",
520            "note",
521            &[("topic", Value::String("billing".into()))],
522        )];
523        let (_dir, store) = store_with_sidecars(&[
524            ("sources/notes", &source_recs),
525            ("records/notes", &record_recs),
526        ]);
527
528        // Without a layer scope, both layers' records match.
529        let unscoped = Query::new()
530            .with_where("topic", "billing")
531            .execute(&store)
532            .unwrap();
533        assert_eq!(
534            paths(&unscoped),
535            path_set(&["sources/notes/n1.md", "records/notes/n2.md"]),
536        );
537
538        // Scoped to Sources, only the sources-layer record survives.
539        let scoped = Query::new()
540            .with_where("topic", "billing")
541            .with_layer(Layer::Sources)
542            .execute(&store)
543            .unwrap();
544        assert_eq!(
545            paths(&scoped),
546            path_set(&["sources/notes/n1.md"]),
547            "with_layer(Sources) drops the records/-layer record"
548        );
549    }
550
551    #[test]
552    fn execute_where_only_with_layer_confines_sidecar_io_not_just_result() {
553        // The O(entities-in-layer) contract for a `--where`-only query (no
554        // `--type`): `--in <layer>` must scope the *sidecar read*, not merely
555        // filter the result after a whole-store read. Proven structurally — a
556        // corrupt sidecar in another layer would make the read error if it were
557        // touched, so a layer-scoped query that SUCCEEDS is proof the
558        // out-of-scope layer's I/O never happened.
559        let dir = TempDir::new().unwrap();
560        let root = dir.path();
561        fs::write(root.join("DB.md"), DB_MD).unwrap();
562
563        // In-scope layer: a valid sidecar with the matching record.
564        let records_dir = root.join("records/contacts");
565        fs::create_dir_all(&records_dir).unwrap();
566        let match_rec = rec(
567            "records/contacts/sarah.md",
568            "contact",
569            &[("domain", Value::String("acme.com".into()))],
570        );
571        fs::write(
572            records_dir.join("index.jsonl"),
573            format!("{}\n", jsonl_line(&match_rec)),
574        )
575        .unwrap();
576
577        // Out-of-scope layer: a CORRUPT sidecar. If a `--in records` query read
578        // it, `read_type_index` would error.
579        let sources_dir = root.join("sources/emails");
580        fs::create_dir_all(&sources_dir).unwrap();
581        fs::write(sources_dir.join("index.jsonl"), "{ not valid json }\n").unwrap();
582
583        let store = Store::open(root).unwrap();
584
585        // Scoped to records: succeeds and returns only the records-layer match,
586        // because the corrupt sources sidecar was never walked.
587        let scoped = Query::new()
588            .with_where("domain", "acme.com")
589            .with_layer(Layer::Records)
590            .execute(&store)
591            .expect("a records-scoped where query must not read the sources sidecar");
592        assert_eq!(paths(&scoped), path_set(&["records/contacts/sarah.md"]));
593
594        // Unscoped: the same query DOES walk every layer and trips over the
595        // corrupt sidecar — proving the corrupt file is real and that only the
596        // layer scope spared the scoped read from reading it.
597        let unscoped = Query::new()
598            .with_where("domain", "acme.com")
599            .execute(&store);
600        assert!(
601            unscoped.is_err(),
602            "an unscoped where query reads every sidecar, including the corrupt one"
603        );
604    }
605
606    #[test]
607    fn execute_full_composition_type_layer_where() {
608        let contacts = [
609            rec(
610                "records/contacts/match.md",
611                "contact",
612                &[("city", Value::String("denver".into()))],
613            ),
614            rec(
615                "records/contacts/wrong-city.md",
616                "contact",
617                &[("city", Value::String("austin".into()))],
618            ),
619        ];
620        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
621
622        let got = Query::new()
623            .with_type("contact")
624            .with_layer(Layer::Records)
625            .with_where("city", "denver")
626            .execute(&store)
627            .unwrap();
628        assert_eq!(paths(&got), path_set(&["records/contacts/match.md"]));
629
630        // The same query scoped to the wrong layer yields nothing, proving the
631        // layer predicate is live in the composed path.
632        let wrong_layer = Query::new()
633            .with_type("contact")
634            .with_layer(Layer::Sources)
635            .with_where("city", "denver")
636            .execute(&store)
637            .unwrap();
638        assert!(wrong_layer.is_empty());
639    }
640
641    #[test]
642    fn execute_bare_query_selects_no_sidecar() {
643        // A fully bare query (no type, no where, no layer) constrains nothing
644        // and has no selective candidate set, so it returns empty WITHOUT
645        // resolving to every record in the store.
646        let contacts = [rec("records/contacts/sarah.md", "contact", &[])];
647        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
648
649        let got = Query::new().execute(&store).unwrap();
650        assert!(
651            got.is_empty(),
652            "an unconstrained query resolves to empty, not to every record"
653        );
654    }
655
656    #[test]
657    fn execute_layer_only_enumerates_that_layer() {
658        // Regression (finding #47): a layer-only query (`--in <layer>` with no
659        // type/where) must enumerate that layer's records, not silently return
660        // []. Records live in two layers; the scope keeps only the named one.
661        let contacts = [rec("records/contacts/sarah.md", "contact", &[])];
662        let emails = [rec("sources/emails/e.md", "email", &[])];
663        let (_dir, store) =
664            store_with_sidecars(&[("records/contacts", &contacts), ("sources/emails", &emails)]);
665
666        let records = Query::new()
667            .with_layer(Layer::Records)
668            .execute(&store)
669            .unwrap();
670        assert_eq!(
671            paths(&records),
672            path_set(&["records/contacts/sarah.md"]),
673            "a layer-only query enumerates that layer, excluding other layers"
674        );
675
676        let sources = Query::new()
677            .with_layer(Layer::Sources)
678            .execute(&store)
679            .unwrap();
680        assert_eq!(
681            paths(&sources),
682            path_set(&["sources/emails/e.md"]),
683            "the sources-layer scope returns the sources records"
684        );
685    }
686
687    #[test]
688    fn execute_type_finds_records_filed_outside_canonical_layer() {
689        // Regression (finding #42): the folder layout is convention, not
690        // enforcement (SPEC). A `contact` filed under sources/ and a custom
691        // `screenshot` that only ever lives under sources/ must both be found
692        // by `--type`, which filters on the frontmatter type — not the type's
693        // canonical layer.
694        let source_contacts = [rec("sources/foo/jane.md", "contact", &[])];
695        let record_contacts = [rec("records/contacts/sarah.md", "contact", &[])];
696        let screenshots = [rec("sources/screenshots/shot1.md", "screenshot", &[])];
697        let (_dir, store) = store_with_sidecars(&[
698            ("sources/foo", &source_contacts),
699            ("records/contacts", &record_contacts),
700            ("sources/screenshots", &screenshots),
701        ]);
702
703        // `--type contact` returns BOTH the canonical and the non-canonical-
704        // layer record (jane under sources/, sarah under records/).
705        let contacts = Query::new().with_type("contact").execute(&store).unwrap();
706        assert_eq!(
707            paths(&contacts),
708            path_set(&["records/contacts/sarah.md", "sources/foo/jane.md"]),
709            "a type query spans every layer the type is filed under"
710        );
711
712        // A custom type that only ever lives under sources/ is still found.
713        let shots = Query::new()
714            .with_type("screenshot")
715            .execute(&store)
716            .unwrap();
717        assert_eq!(
718            paths(&shots),
719            path_set(&["sources/screenshots/shot1.md"]),
720            "a type filed entirely under sources/ is visible to --type"
721        );
722
723        // `--type contact --in sources` resolves to the sources-layer contact,
724        // not [] (the previously-dead --type/--in combination).
725        let in_sources = Query::new()
726            .with_type("contact")
727            .with_layer(Layer::Sources)
728            .execute(&store)
729            .unwrap();
730        assert_eq!(
731            paths(&in_sources),
732            path_set(&["sources/foo/jane.md"]),
733            "--type X --in <layer> returns the records of that type under the layer"
734        );
735
736        // And `--type contact --in records` keeps only the records-layer one.
737        let in_records = Query::new()
738            .with_type("contact")
739            .with_layer(Layer::Records)
740            .execute(&store)
741            .unwrap();
742        assert_eq!(
743            paths(&in_records),
744            path_set(&["records/contacts/sarah.md"]),
745            "the layer scope confines a type query to the named layer"
746        );
747    }
748
749    #[test]
750    fn execute_tag_membership_via_where() {
751        let mut urgent = rec("records/tasks/t1.md", "task", &[]);
752        urgent.tags = vec!["urgent".into(), "ops".into()];
753        let mut calm = rec("records/tasks/t2.md", "task", &[]);
754        calm.tags = vec!["ops".into()];
755        let recs = [urgent, calm];
756        let (_dir, store) = store_with_sidecars(&[("records/tasks", &recs)]);
757
758        let got = Query::new()
759            .with_type("task")
760            .with_where("tags", "urgent")
761            .execute(&store)
762            .unwrap();
763        assert_eq!(
764            paths(&got),
765            path_set(&["records/tasks/t1.md"]),
766            "tags match on membership: only the record carrying the tag matches"
767        );
768    }
769
770    #[test]
771    fn execute_matches_numeric_and_bool_fields_from_string_predicate() {
772        let recs = [
773            rec(
774                "records/invoices/paid.md",
775                "invoice",
776                &[
777                    ("amount", Value::Number(42.into())),
778                    ("paid", Value::Bool(true)),
779                ],
780            ),
781            rec(
782                "records/invoices/unpaid.md",
783                "invoice",
784                &[
785                    ("amount", Value::Number(99.into())),
786                    ("paid", Value::Bool(false)),
787                ],
788            ),
789        ];
790        let (_dir, store) = store_with_sidecars(&[("records/invoices", &recs)]);
791
792        let by_amount = Query::new()
793            .with_type("invoice")
794            .with_where("amount", "42")
795            .execute(&store)
796            .unwrap();
797        assert_eq!(
798            paths(&by_amount),
799            path_set(&["records/invoices/paid.md"]),
800            "a JSON number matches the string form of the predicate"
801        );
802
803        let by_paid = Query::new()
804            .with_type("invoice")
805            .with_where("paid", "true")
806            .execute(&store)
807            .unwrap();
808        assert_eq!(
809            paths(&by_paid),
810            path_set(&["records/invoices/paid.md"]),
811            "a JSON bool matches \"true\"/\"false\""
812        );
813    }
814
815    #[test]
816    fn execute_honors_last_write_wins_in_sidecar() {
817        // Two JSONL lines for the same path: the later supersedes the earlier
818        // (read_type_index applies last-write-wins). A query on the superseding
819        // field must match, and one on the superseded field must not.
820        let dir = TempDir::new().unwrap();
821        let root = dir.path();
822        fs::write(root.join("DB.md"), DB_MD).unwrap();
823        let folder = root.join("records/contacts");
824        fs::create_dir_all(&folder).unwrap();
825
826        let old = rec(
827            "records/contacts/sarah.md",
828            "contact",
829            &[("status", Value::String("lead".into()))],
830        );
831        let new = rec(
832            "records/contacts/sarah.md",
833            "contact",
834            &[("status", Value::String("customer".into()))],
835        );
836        fs::write(
837            folder.join("index.jsonl"),
838            format!("{}\n{}\n", jsonl_line(&old), jsonl_line(&new)),
839        )
840        .unwrap();
841        let store = Store::open(root).unwrap();
842
843        let superseding = Query::new()
844            .with_type("contact")
845            .with_where("status", "customer")
846            .execute(&store)
847            .unwrap();
848        assert_eq!(superseding.len(), 1, "the superseding line's value matches");
849
850        let superseded = Query::new()
851            .with_type("contact")
852            .with_where("status", "lead")
853            .execute(&store)
854            .unwrap();
855        assert!(
856            superseded.is_empty(),
857            "the superseded line's value no longer matches after last-write-wins"
858        );
859    }
860
861    #[test]
862    fn execute_returns_full_records_not_just_paths() {
863        // The contract returns full IndexRecords straight from the sidecar:
864        // summary, tags, links, and fields must survive the round-trip.
865        let mut r = rec(
866            "records/contacts/sarah.md",
867            "contact",
868            &[("company", Value::String("acme".into()))],
869        );
870        r.summary = "Renewal champion".into();
871        r.tags = vec!["vip".into()];
872        r.links = vec!["records/profiles/sarah-chen.md".into()];
873        let recs = [r];
874        let (_dir, store) = store_with_sidecars(&[("records/contacts", &recs)]);
875
876        let got = Query::new().with_type("contact").execute(&store).unwrap();
877        assert_eq!(got.len(), 1);
878        let only = &got[0];
879        assert_eq!(only.summary, "Renewal champion");
880        assert_eq!(only.tags, vec!["vip".to_string()]);
881        assert_eq!(
882            only.links,
883            vec!["records/profiles/sarah-chen.md".to_string()]
884        );
885        assert_eq!(
886            only.fields.get("company"),
887            Some(&Value::String("acme".into())),
888            "type-specific fields come back verbatim for on-demand use"
889        );
890    }
891
892    // ── Pure matcher logic (no store I/O) ────────────────────────────────────
893
894    #[test]
895    fn record_matches_where_on_typed_columns() {
896        let mut r = rec("records/contacts/x.md", "contact", &[]);
897        r.summary = "hello".into();
898
899        assert!(record_matches_where(&r, "type", "contact"));
900        assert!(!record_matches_where(&r, "type", "company"));
901        assert!(record_matches_where(&r, "summary", "hello"));
902        assert!(!record_matches_where(&r, "summary", "goodbye"));
903        assert!(record_matches_where(&r, "path", "records/contacts/x.md"));
904        assert!(!record_matches_where(&r, "path", "records/contacts/y.md"));
905    }
906
907    #[test]
908    fn record_matches_where_on_timestamps_uses_rfc3339() {
909        let mut r = rec("records/meetings/m.md", "meeting", &[]);
910        let ts = chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00+00:00").unwrap();
911        r.created = Some(ts);
912
913        assert!(record_matches_where(
914            &r,
915            "created",
916            "2026-05-29T12:00:00+00:00"
917        ));
918        assert!(!record_matches_where(
919            &r,
920            "created",
921            "2026-05-29T13:00:00+00:00"
922        ));
923        // `updated` is unset → never matches, even the same instant.
924        assert!(!record_matches_where(
925            &r,
926            "updated",
927            "2026-05-29T12:00:00+00:00"
928        ));
929    }
930
931    #[test]
932    fn record_matches_where_timestamp_z_and_offset_spellings_are_equal() {
933        // Regression: the in-memory filter compared `to_rfc3339()` (always the
934        // `+00:00` form) to the raw predicate string, so a `Z`-spelled query of
935        // the identical instant silently failed — and disagreed with the
936        // `Store::find_by_where_in` sidecar pre-filter (instant-based),
937        // dropping real matches. Both spellings must compare equal now.
938        let mut stored_z = rec("records/meetings/m.md", "meeting", &[]);
939        stored_z.created =
940            Some(chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00Z").unwrap());
941        assert!(record_matches_where(
942            &stored_z,
943            "created",
944            "2026-05-29T12:00:00Z"
945        ));
946        assert!(record_matches_where(
947            &stored_z,
948            "created",
949            "2026-05-29T12:00:00+00:00"
950        ));
951
952        // Stored as `+00:00`, queried as `Z` — this is the spelling pair that
953        // failed before the fix.
954        let mut stored_offset = rec("records/meetings/n.md", "meeting", &[]);
955        stored_offset.created =
956            Some(chrono::DateTime::parse_from_rfc3339("2026-05-29T12:00:00+00:00").unwrap());
957        assert!(record_matches_where(
958            &stored_offset,
959            "created",
960            "2026-05-29T12:00:00Z"
961        ));
962
963        // A different instant still does not match; an unparseable value is false.
964        assert!(!record_matches_where(
965            &stored_z,
966            "created",
967            "2026-05-29T13:00:00Z"
968        ));
969        assert!(!record_matches_where(
970            &stored_z,
971            "created",
972            "not-a-timestamp"
973        ));
974    }
975
976    #[test]
977    fn record_matches_where_absent_field_is_false() {
978        let r = rec("records/contacts/x.md", "contact", &[]);
979        assert!(
980            !record_matches_where(&r, "nonexistent", "anything"),
981            "an absent frontmatter key never matches"
982        );
983    }
984
985    #[test]
986    fn json_value_matches_covers_scalars_and_arrays() {
987        assert!(json_value_matches(&Value::String("acme".into()), "acme"));
988        assert!(!json_value_matches(&Value::String("acme".into()), "globex"));
989
990        assert!(json_value_matches(&Value::Number(42.into()), "42"));
991        assert!(!json_value_matches(&Value::Number(42.into()), "43"));
992
993        assert!(json_value_matches(&Value::Bool(true), "true"));
994        assert!(json_value_matches(&Value::Bool(false), "false"));
995        assert!(!json_value_matches(&Value::Bool(true), "false"));
996
997        let arr = Value::Array(vec![Value::String("a".into()), Value::String("b".into())]);
998        assert!(json_value_matches(&arr, "b"), "array matches on membership");
999        assert!(!json_value_matches(&arr, "c"));
1000    }
1001
1002    #[test]
1003    fn json_value_matches_null_and_object_never_match() {
1004        assert!(!json_value_matches(&Value::Null, ""));
1005        assert!(!json_value_matches(&Value::Null, "null"));
1006        let obj = serde_json::json!({"k": "v"});
1007        assert!(!json_value_matches(&obj, "v"));
1008    }
1009
1010    #[test]
1011    fn record_in_layer_keys_off_first_path_component() {
1012        let s = rec("sources/emails/e.md", "email", &[]);
1013        let r = rec("records/contacts/c.md", "contact", &[]);
1014        // A conclusion record (the former wiki-page) lives in the records layer.
1015        let c = rec("records/profiles/p.md", "profile", &[]);
1016
1017        assert!(record_in_layer(&s, Layer::Sources));
1018        assert!(!record_in_layer(&s, Layer::Records));
1019        assert!(record_in_layer(&r, Layer::Records));
1020        assert!(!record_in_layer(&r, Layer::Sources));
1021        assert!(record_in_layer(&c, Layer::Records));
1022        assert!(!record_in_layer(&c, Layer::Sources));
1023    }
1024
1025    #[test]
1026    fn filter_candidates_skips_already_applied_where_clause() {
1027        // Simulate the find_by_where path: the first clause is "already applied"
1028        // by the sidecar reader, so filter_candidates must skip it and only
1029        // enforce the remaining clause. A record satisfying only the (skipped)
1030        // first clause but NOT the second must still be dropped.
1031        let q = Query::new()
1032            .with_where("company", "acme")
1033            .with_where("status", "active");
1034
1035        let keep = rec(
1036            "records/contacts/keep.md",
1037            "contact",
1038            &[
1039                ("company", Value::String("acme".into())),
1040                ("status", Value::String("active".into())),
1041            ],
1042        );
1043        let drop = rec(
1044            "records/contacts/drop.md",
1045            "contact",
1046            &[
1047                ("company", Value::String("acme".into())),
1048                ("status", Value::String("churned".into())),
1049            ],
1050        );
1051
1052        let out = q.filter_candidates(vec![keep, drop], false, 1);
1053        assert_eq!(
1054            paths(&out),
1055            path_set(&["records/contacts/keep.md"]),
1056            "the second clause is enforced even when the first is pre-applied"
1057        );
1058    }
1059
1060    #[test]
1061    fn filter_candidates_enforces_type_when_not_preapplied() {
1062        // When the candidate set did NOT come from find_by_type (type_applied =
1063        // false), filter_candidates must still drop records of the wrong type.
1064        let q = Query::new().with_type("contact");
1065        let contact = rec("records/contacts/c.md", "contact", &[]);
1066        let company = rec("records/companies/co.md", "company", &[]);
1067
1068        let out = q.filter_candidates(vec![contact, company], false, 0);
1069        assert_eq!(paths(&out), path_set(&["records/contacts/c.md"]));
1070    }
1071
1072    /// Local guard: the test fixtures write sidecars under the same canonical
1073    /// folders the store reader derives, so a `with_type` query finds them.
1074    /// If this drifts, the integration tests above silently weaken — assert the
1075    /// convention explicitly.
1076    #[test]
1077    fn fixture_canonical_folders_match_store_expectations() {
1078        let contacts = [rec("records/contacts/x.md", "contact", &[])];
1079        let (_dir, store) = store_with_sidecars(&[("records/contacts", &contacts)]);
1080        // `contact` records live at records/contacts/ — the same folder the
1081        // fixture wrote — so the type read is non-empty.
1082        let got = store.find_by_type("contact").unwrap();
1083        assert_eq!(got.len(), 1, "fixture folder == store's canonical folder");
1084    }
1085}