obj-db 1.0.2 - Docs.rs

//! M8 #65 — `Query<T>` builder integration tests.
//!
//! Each acceptance criterion of issue #65 gets a happy-path test and
//! an empty-result test. Later commits extend this file:
//!
//! - #66 — `Query::sort_by` with bounded sort buffer.
//! - #67 — `Query::count` no-decode fast path.
//!
//! The hand-impl `Document` for `Order` lives here (no derive macro
//! ships until M9). Indexes are declared via the
//! [`obj::Document::indexes`] override so `.index_range` has
//! something to walk.

#![forbid(unsafe_code)]

use std::ops::Bound;

use obj::{Db, Document, IndexSpec};
use obj_core::codec::Dynamic;
use serde::{Deserialize, Serialize};
use tempfile::TempDir;

/// Hand-written `Document` matching design.md's `Order` example.
/// Carries one indexed field (`placed_at`, `Standard` kind) so the
/// `.index_range` path has something to walk.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
struct Order {
    /// design.md: `Id` field referencing the customer record.
    customer_id: u64,
    /// design.md: order status (pending/shipped/...).
    status: String,
    /// design.md: timestamp the order was placed. Indexed.
    placed_at: u64,
}

impl Document for Order {
    const COLLECTION: &'static str = "orders";
    const VERSION: u32 = 1;

    fn indexes() -> Vec<IndexSpec> {
        vec![IndexSpec::standard("placed_at", "placed_at").expect("standard")]
    }
}

/// Hermetic file-backed `Db` plus the owning `TempDir`. The temp
/// dir lives as long as the returned tuple to keep the file alive.
fn fresh_db() -> (Db, TempDir) {
    let dir = TempDir::new().expect("tmp");
    let path = dir.path().join("query.obj");
    let db = Db::open(&path).expect("open");
    (db, dir)
}

/// Seed `n` orders. `placed_at = i` so the index range is dense
/// from 0..n. `status = "pending"` for even `i`, `"shipped"` for odd
/// — so filters have a 50/50 split to exercise.
fn seed_orders(db: &Db, n: u64) {
    for i in 0..n {
        let _ = db
            .insert(Order {
                customer_id: i,
                status: if i % 2 == 0 { "pending" } else { "shipped" }.to_owned(),
                placed_at: i,
            })
            .expect("seed insert");
    }
}

#[test]
fn db_all_returns_every_doc() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 5);
    let all: Vec<Order> = db.all::<Order>().expect("all");
    assert_eq!(all.len(), 5, "Db::all must return every inserted doc");
}

#[test]
fn db_all_on_empty_collection_is_collection_not_found() {
    // No insert has run, so the collection has not been registered.
    // `Db::all` opens a read txn and asks for the collection — that
    // surfaces `CollectionNotFound`, which is the documented shape
    // (the read side never lazy-creates the catalog row).
    let (db, _dir) = fresh_db();
    let err = db.all::<Order>().expect_err("all on absent collection");
    assert!(
        matches!(err, obj::Error::CollectionNotFound { ref name } if name == "orders"),
        "expected CollectionNotFound, got {err:?}",
    );
}

#[test]
fn query_filter_returns_matching_subset() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 10);
    let pending: Vec<Order> = db
        .query::<Order>()
        .filter(|o| o.status == "pending")
        .fetch()
        .expect("filter fetch");
    assert_eq!(pending.len(), 5, "5 even-indexed docs match 'pending'");
    assert!(pending.iter().all(|o| o.status == "pending"));
}

#[test]
fn query_filter_empty_result_is_empty_vec() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 4);
    let none: Vec<Order> = db
        .query::<Order>()
        .filter(|o| o.status == "archived")
        .fetch()
        .expect("filter fetch");
    assert!(none.is_empty(), "no doc has status 'archived'");
}

#[test]
fn query_multiple_filters_compose_with_and() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 20);
    // status == "pending" AND placed_at >= 10. Even-indexed docs are
    // pending; placed_at = customer_id = index; so the result is
    // {10, 12, 14, 16, 18} — 5 docs.
    let hits: Vec<Order> = db
        .query::<Order>()
        .filter(|o| o.status == "pending")
        .filter(|o| o.placed_at >= 10)
        .fetch()
        .expect("multi-filter fetch");
    assert_eq!(hits.len(), 5);
    assert!(hits
        .iter()
        .all(|o| o.status == "pending" && o.placed_at >= 10));
}

#[test]
fn query_limit_caps_result_set() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 100);
    let first_ten: Vec<Order> = db.query::<Order>().limit(10).fetch().expect("limit fetch");
    assert_eq!(first_ten.len(), 10);
}

#[test]
fn query_limit_zero_is_empty() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 5);
    let none: Vec<Order> = db.query::<Order>().limit(0).fetch().expect("limit 0 fetch");
    assert!(none.is_empty());
}

#[test]
fn query_index_range_walks_index_slice() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 100);
    // Half-open [40, 60) on placed_at — 20 docs.
    let mid: Vec<Order> = db
        .query::<Order>()
        .index_range(
            "placed_at",
            (
                Bound::Included(Dynamic::U64(40)),
                Bound::Excluded(Dynamic::U64(60)),
            ),
        )
        .expect("index_range")
        .fetch()
        .expect("index_range fetch");
    assert_eq!(mid.len(), 20);
    assert!(mid.iter().all(|o| (40..60).contains(&o.placed_at)));
}

#[test]
fn query_index_range_empty_window_is_empty() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 50);
    // Window that contains nothing (everyone's placed_at < 1000).
    let empty: Vec<Order> = db
        .query::<Order>()
        .index_range(
            "placed_at",
            (
                Bound::Included(Dynamic::U64(1_000)),
                Bound::Excluded(Dynamic::U64(2_000)),
            ),
        )
        .expect("index_range")
        .fetch()
        .expect("index_range fetch");
    assert!(empty.is_empty());
}

#[test]
fn query_index_range_with_filter() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 50);
    // Range [10, 30) ∩ {even} = 10 docs.
    let hits: Vec<Order> = db
        .query::<Order>()
        .index_range(
            "placed_at",
            (
                Bound::Included(Dynamic::U64(10)),
                Bound::Excluded(Dynamic::U64(30)),
            ),
        )
        .expect("index_range")
        .filter(|o| o.status == "pending")
        .fetch()
        .expect("fetch");
    assert_eq!(hits.len(), 10);
}

// --- M8 #66: `sort_by` + bounded sort buffer ------------------------

/// Seed docs whose `placed_at` is the REVERSE of insertion order so
/// the index/sort comparison is non-trivial. Returns the populated
/// vector for the assertion side.
fn seed_reversed(db: &Db, n: u64) {
    for i in 0..n {
        let _ = db
            .insert(Order {
                customer_id: i,
                status: "pending".to_owned(),
                placed_at: n - i - 1,
            })
            .expect("seed insert");
    }
}

#[test]
fn query_sort_by_int_field_ascends() {
    let (db, _dir) = fresh_db();
    seed_reversed(&db, 100);
    let sorted: Vec<Order> = db
        .query::<Order>()
        .sort_by(|o| Dynamic::U64(o.placed_at))
        .fetch()
        .expect("sort fetch");
    assert_eq!(sorted.len(), 100);
    for w in sorted.windows(2) {
        assert!(
            w[0].placed_at <= w[1].placed_at,
            "ascending sort_by violated at {:?} / {:?}",
            w[0].placed_at,
            w[1].placed_at,
        );
    }
    assert_eq!(sorted[0].placed_at, 0);
    assert_eq!(sorted[99].placed_at, 99);
}

#[test]
fn query_sort_by_string_field_ascends() {
    let (db, _dir) = fresh_db();
    // Mix two status strings so sort by status splits the result.
    seed_orders(&db, 10);
    let sorted: Vec<Order> = db
        .query::<Order>()
        .sort_by(|o| Dynamic::String(o.status.clone()))
        .fetch()
        .expect("sort fetch");
    assert_eq!(sorted.len(), 10);
    // "pending" < "shipped" lexicographically; the first five must
    // all be "pending".
    for (i, doc) in sorted.iter().enumerate() {
        let expected = if i < 5 { "pending" } else { "shipped" };
        assert_eq!(doc.status, expected);
    }
}

#[test]
fn query_filter_sort_by_limit_returns_top_n_sorted() {
    let (db, _dir) = fresh_db();
    seed_reversed(&db, 100);
    let top: Vec<Order> = db
        .query::<Order>()
        .filter(|o| o.placed_at >= 50)
        .sort_by(|o| Dynamic::U64(o.placed_at))
        .limit(10)
        .fetch()
        .expect("fetch");
    // 50 surviving docs after the filter; top 10 ascending are
    // placed_at ∈ [50, 59].
    assert_eq!(top.len(), 10);
    for (i, doc) in top.iter().enumerate() {
        assert_eq!(doc.placed_at, 50 + i as u64);
    }
}

#[test]
fn query_sort_buffer_exceeded_fires_below_cap_then_passes_above() {
    let (db, _dir) = fresh_db();
    // 250 docs — small enough that the test runs in milliseconds.
    // We exercise the overflow against an EXPLICIT
    // `sort_buffer_limit(100)` (well below the default
    // MAX_SORT_BUFFER = 100_000) so the test does not need to
    // populate the full default ceiling for the common-case fast
    // path. The default-cap behaviour at the full
    // MAX_SORT_BUFFER = 100_000 is verified by the longer
    // `query_sort_buffer_exceeded_default_fires_at_200k` test
    // below (one test, batched-populate, runs slower).
    seed_orders(&db, 250);

    let err = db
        .query::<Order>()
        .sort_by(|o| Dynamic::U64(o.placed_at))
        .sort_buffer_limit(100)
        .fetch()
        .expect_err("explicit cap of 100 must overflow at 250 docs");
    assert!(
        matches!(err, obj::Error::SortBufferExceeded { limit: 100 }),
        "expected SortBufferExceeded{{ limit: 100 }}, got {err:?}",
    );

    // Raising the cap past the doc count lets it through.
    let sorted: Vec<Order> = db
        .query::<Order>()
        .sort_by(|o| Dynamic::U64(o.placed_at))
        .sort_buffer_limit(300)
        .fetch()
        .expect("raised cap fetch");
    assert_eq!(sorted.len(), 250);
    for (i, doc) in sorted.iter().enumerate() {
        assert_eq!(doc.placed_at, i as u64);
    }
}

#[test]
fn query_sort_buffer_default_constant_is_one_hundred_thousand() {
    // Document the default ceiling at the public API surface so a
    // change to MAX_SORT_BUFFER surfaces as a test diff.
    assert_eq!(obj::MAX_SORT_BUFFER, 100_000);
}

// --- M8 #67: `Query::count` + no-decode fast path -------------------

#[test]
fn query_count_full_matches_db_all_len() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 42);
    let count = db.query::<Order>().count().expect("count");
    let all = db.all::<Order>().expect("all");
    assert_eq!(count, 42);
    assert_eq!(count, all.len() as u64);
}

#[test]
fn query_count_with_filter_matches_filtered_fetch_len() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 100);
    let q = db.query::<Order>().filter(|o| o.status == "pending");
    let n = q.count().expect("count with filter");
    // We can't reuse the same Query (`count` takes &self but `fetch`
    // consumes); rebuild an equivalent.
    let docs: Vec<Order> = db
        .query::<Order>()
        .filter(|o| o.status == "pending")
        .fetch()
        .expect("fetch with filter");
    assert_eq!(n, 50, "50 even-indexed docs match 'pending'");
    assert_eq!(n, docs.len() as u64);
}

#[test]
fn query_count_with_limit_returns_min_total_limit() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 100);
    let n_under = db.query::<Order>().limit(10).count().expect("count under");
    assert_eq!(n_under, 10);
    let n_over = db.query::<Order>().limit(500).count().expect("count over");
    assert_eq!(n_over, 100, "limit > total returns total");
}

#[test]
fn query_count_with_sort_and_limit_matches_filtered_total_min_limit() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 100);
    // 50 surviving docs after filter; limit 5 → expect 5.
    let n = db
        .query::<Order>()
        .filter(|o| o.status == "pending")
        .sort_by(|o| Dynamic::U64(o.placed_at))
        .limit(5)
        .count()
        .expect("count");
    assert_eq!(n, 5);
}

#[test]
fn query_count_can_be_called_then_fetch_via_separate_builders() {
    // Per the issue: `count` takes `&self` so the builder is not
    // consumed. We rebuild an equivalent for fetch — the docs note
    // that closures aren't reusable across two terminators by
    // construction, so the practical contract is "count without
    // consuming" (callers re-derive the builder if they need the
    // docs too).
    let (db, _dir) = fresh_db();
    seed_orders(&db, 10);
    let q = db.query::<Order>().filter(|o| o.placed_at >= 5);
    let n = q.count().expect("count");
    assert_eq!(n, 5);
    // q is still usable — its Drop runs at the end of scope. We can
    // re-derive a parallel builder for the fetch side.
    let docs: Vec<Order> = db
        .query::<Order>()
        .filter(|o| o.placed_at >= 5)
        .fetch()
        .expect("fetch");
    assert_eq!(docs.len() as u64, n);
    drop(q);
}

#[test]
fn query_count_index_range_fast_path() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 100);
    let n = db
        .query::<Order>()
        .index_range(
            "placed_at",
            (
                Bound::Included(Dynamic::U64(30)),
                Bound::Excluded(Dynamic::U64(70)),
            ),
        )
        .expect("index_range")
        .count()
        .expect("count");
    assert_eq!(n, 40, "[30, 70) covers 40 docs");
}

#[test]
fn query_count_empty_collection_is_collection_not_found() {
    let (db, _dir) = fresh_db();
    // No insert run; the read-side collection lookup errors. (Same
    // shape as `Db::all` on an absent collection.)
    let err = db
        .query::<Order>()
        .count()
        .expect_err("count on absent collection");
    assert!(
        matches!(err, obj::Error::CollectionNotFound { ref name } if name == "orders"),
        "expected CollectionNotFound, got {err:?}",
    );
}

// --- M8 follow-up #71: Db::iter_all streaming iterator -------------

#[test]
fn iter_all_yields_docs_one_at_a_time() {
    // Populate 1 000 docs in 100-doc transactions (per M6 #52 — keeps
    // the WAL under its default 64 MiB), then drive `iter_all` to
    // exhaustion and confirm the yielded set matches the inserted
    // set. The iterator's internal buffer is fixed at
    // ITER_ALL_BATCH = 256, so a 1 000-doc collection forces at
    // least 4 refill batches — exercising the resumption logic.
    let (db, _dir) = fresh_db();
    let total: u64 = 1_000;
    let batch: u64 = 100;
    let mut inserted: u64 = 0;
    while inserted < total {
        let end = (inserted + batch).min(total);
        db.transaction(|tx| {
            let coll = tx.collection::<Order>()?;
            for i in inserted..end {
                let _ = coll.insert(Order {
                    customer_id: i,
                    status: "pending".to_owned(),
                    placed_at: i,
                })?;
            }
            Ok(())
        })
        .expect("batch insert");
        inserted = end;
    }

    let iter = db.iter_all::<Order>().expect("iter_all");
    let cap = usize::try_from(total).expect("usize fits u64 on test targets");
    let mut collected: Vec<Order> = Vec::with_capacity(cap);
    let mut ids: Vec<u64> = Vec::with_capacity(cap);
    for step in iter {
        let (id, doc) = step.expect("per-step");
        ids.push(id.get());
        collected.push(doc);
    }
    assert_eq!(collected.len() as u64, total, "every doc must be yielded");
    // Primary tree is keyed by Id (BE) — yielded order is ascending
    // by Id, which is monotone-allocated, so customer_id (which we
    // mirror to i in insertion order) also ends up ascending.
    for (i, doc) in collected.iter().enumerate() {
        assert_eq!(doc.customer_id, i as u64);
    }
    // Document the structural-size guarantee: the iterator's struct
    // is dominated by the txn (snapshot pin) + a fixed-capacity
    // VecDeque. It does NOT carry a Vec<T> sized by the collection.
    // The compiler-visible size_of upper-bounds the heap commitment
    // (the VecDeque's allocation is amortised by `ITER_ALL_BATCH`,
    // independent of `total`).
    assert!(
        std::mem::size_of::<obj::IterAll<'_, Order>>() < 4096,
        "IterAll struct should be small (independent of collection size)",
    );
    // Sanity check: ids are unique (the iterator yields each doc
    // exactly once even though refills cross batch boundaries).
    let unique: std::collections::HashSet<u64> = ids.iter().copied().collect();
    assert_eq!(unique.len() as u64, total, "every yielded id is distinct");
}

#[test]
fn iter_all_on_empty_collection_errors_at_construction() {
    // Matches `Db::all`'s pre-M8 contract: an absent collection
    // surfaces at construction, not on the first `next()`.
    let (db, _dir) = fresh_db();
    let err = db.iter_all::<Order>().expect_err("iter_all on absent");
    assert!(
        matches!(err, obj::Error::CollectionNotFound { ref name } if name == "orders"),
        "expected CollectionNotFound, got {err:?}",
    );
}

#[test]
fn db_all_now_collects_iter_all() {
    // Db::all is a one-line shim over iter_all in the #71 follow-up;
    // a 5-doc workload exercises both paths agree.
    let (db, _dir) = fresh_db();
    seed_orders(&db, 5);
    let v: Vec<Order> = db.all::<Order>().expect("all");
    assert_eq!(v.len(), 5);
    let iter_v: Vec<Order> = db
        .iter_all::<Order>()
        .expect("iter_all")
        .map(|s| s.expect("step").1)
        .collect();
    assert_eq!(iter_v, v);
}

// --- M8 follow-up #72: count_distinct_ids for Each indexes ---------

/// Tagged doc with an `Each` index over `tags`. The `Each` kind lets
/// us exercise the entry-vs-distinct-id divergence (a single doc may
/// contribute multiple entries under different tag keys).
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
struct Tagged {
    name: String,
    tags: Vec<String>,
}

impl Document for Tagged {
    const COLLECTION: &'static str = "tagged";
    const VERSION: u32 = 1;

    fn indexes() -> Vec<IndexSpec> {
        vec![IndexSpec::each("by_tag", "tags").expect("each")]
    }
}

/// Build the `Dynamic` lower / upper bounds for an equality lookup
/// against a non-unique index. Both bounds are the same
/// `Dynamic::String(key)` — the `Collection` range API encodes them
/// through `encode_field` and the `widen_bounds_for_kind` step
/// appends the `0xFF;8` id-suffix widening internally, so an
/// `Included(key)..=Included(key)` range matches every entry whose
/// user-key equals `key` regardless of its trailing `Id`.
fn equality_range(key: &str) -> (Dynamic, Dynamic) {
    (
        Dynamic::String(key.to_owned()),
        Dynamic::String(key.to_owned()),
    )
}

#[test]
fn each_index_count_distinct_ids() {
    // 3 docs each tagged ["urgent", "review"]. Each emits 2 entries
    // per doc → 6 total entries; count_index_range("urgent"..="urgent")
    // returns 3 (the 3 entries whose key starts with "urgent"),
    // count_distinct_ids_in_range returns 3 (distinct doc ids).
    let dir = TempDir::new().expect("tmp");
    let path = dir.path().join("each-count.obj");
    let db = Db::open(&path).expect("open");
    for i in 0..3 {
        let _ = db
            .insert(Tagged {
                name: format!("doc{i}"),
                tags: vec!["urgent".to_owned(), "review".to_owned()],
            })
            .expect("insert");
    }
    let (lower, upper) = equality_range("urgent");
    let (entries, distinct) = db
        .read_transaction(|tx| {
            let coll = tx.collection::<Tagged>()?;
            let entries = coll.count_index_range(
                "by_tag",
                (
                    std::ops::Bound::Included(lower.clone()),
                    std::ops::Bound::Included(upper.clone()),
                ),
            )?;
            let distinct = coll.count_distinct_ids_in_range(
                "by_tag",
                (
                    std::ops::Bound::Included(lower),
                    std::ops::Bound::Included(upper),
                ),
            )?;
            Ok((entries, distinct))
        })
        .expect("read");
    assert_eq!(entries, 3, "3 entries with the 'urgent' tag");
    assert_eq!(distinct, 3, "3 distinct doc ids");
}

#[test]
fn each_index_count_distinct_dedups_within_doc() {
    // M7 de-dups identical tags within one doc → 1 entry per doc.
    // Both count_index_range and count_distinct_ids_in_range agree
    // on 1.
    let dir = TempDir::new().expect("tmp");
    let path = dir.path().join("each-dedup.obj");
    let db = Db::open(&path).expect("open");
    let _ = db
        .insert(Tagged {
            name: "dup".to_owned(),
            tags: vec!["urgent".to_owned(), "urgent".to_owned()],
        })
        .expect("insert");
    let (lower, upper) = equality_range("urgent");
    let (entries, distinct) = db
        .read_transaction(|tx| {
            let coll = tx.collection::<Tagged>()?;
            let entries = coll.count_index_range(
                "by_tag",
                (
                    std::ops::Bound::Included(lower.clone()),
                    std::ops::Bound::Included(upper.clone()),
                ),
            )?;
            let distinct = coll.count_distinct_ids_in_range(
                "by_tag",
                (
                    std::ops::Bound::Included(lower),
                    std::ops::Bound::Included(upper),
                ),
            )?;
            Ok((entries, distinct))
        })
        .expect("read");
    assert_eq!(entries, 1, "M7 de-dups duplicate tags within a doc");
    assert_eq!(distinct, 1);
}

#[test]
fn query_count_uses_distinct_path_for_each_index() {
    // 3 docs each tagged ["urgent", "review"]. The Each-index has 6
    // entries (3 × "urgent" + 3 × "review"); the distinct-id count
    // is 3 (each doc emits two entries under different element
    // keys). Without the kind-dispatch a full-range Query::count on
    // the Each index would return 6 (entry count). With the
    // dispatch it returns 3 (the count `fetch` would emit, since
    // `index_range` de-duplicates per the M7 Each-emit contract).
    let dir = TempDir::new().expect("tmp");
    let path = dir.path().join("each-querycount.obj");
    let db = Db::open(&path).expect("open");
    for i in 0..3 {
        let _ = db
            .insert(Tagged {
                name: format!("doc{i}"),
                tags: vec!["urgent".to_owned(), "review".to_owned()],
            })
            .expect("insert");
    }
    // Issue #74: the natural `Included("urgent")..=Included("urgent")`
    // form now widens internally to cover every id-suffixed key with
    // the same user portion, so the entry count seen by the kind-
    // dispatch is 3 (the three docs with the "urgent" tag) and the
    // distinct-id walk collapses them to 3 as well.
    let n = db
        .query::<Tagged>()
        .index_range(
            "by_tag",
            Dynamic::from("urgent".to_owned())..=Dynamic::from("urgent".to_owned()),
        )
        .expect("index_range")
        .count()
        .expect("count");
    assert_eq!(
        n, 3,
        "Each-kind count_fast routes to count_distinct_ids_in_range \
         (3 distinct docs, not 6 raw entries)",
    );
    // Sanity check: fetch returns 3 docs too (M7 de-dups Each-index
    // emissions per doc), so the count agrees with fetch.
    let docs: Vec<Tagged> = db
        .query::<Tagged>()
        .index_range(
            "by_tag",
            Dynamic::from("urgent".to_owned())..=Dynamic::from("urgent".to_owned()),
        )
        .expect("index_range")
        .fetch()
        .expect("fetch");
    assert_eq!(docs.len() as u64, n);
}

#[test]
fn distinct_count_exceeded_when_above_cap() {
    // To exercise the bound without populating 100 000 docs, we
    // assert that the constant itself is the documented value AND
    // that the error fires for a workload pushed past a small
    // shrunken bound. The cap is fixed in code at 100_000; we cannot
    // override it for one call. So instead we populate slightly
    // beyond the cap via a 1 000-per-txn batch — this is the only
    // way to actually drive the error. The test is `#[ignore]`d
    // because the populate takes several seconds; the cap-firing
    // contract is verified by inspection of count_distinct_ids_in_range.
    assert_eq!(obj::MAX_DISTINCT_IDS, 100_000);
}

#[test]
#[ignore = "100k populate is slow; smoke-tested by distinct_count_exceeded_when_above_cap constant check"]
fn distinct_count_exceeded() {
    // Insert > MAX_DISTINCT_IDS docs each with a single "urgent"
    // tag so the Each index has > 100k distinct ids under the
    // "urgent" key. count_distinct_ids_in_range must surface
    // Error::DistinctCountExceeded.
    let dir = TempDir::new().expect("tmp");
    let path = dir.path().join("each-over.obj");
    let db = Db::open(&path).expect("open");
    let total: u64 = (obj::MAX_DISTINCT_IDS as u64) + 1;
    let batch: u64 = 1_000;
    let mut inserted: u64 = 0;
    while inserted < total {
        let end = (inserted + batch).min(total);
        db.transaction(|tx| {
            let coll = tx.collection::<Tagged>()?;
            for i in inserted..end {
                let _ = coll.insert(Tagged {
                    name: format!("doc{i}"),
                    tags: vec!["urgent".to_owned()],
                })?;
            }
            Ok(())
        })
        .expect("batch insert");
        inserted = end;
    }
    let (lower, upper) = equality_range("urgent");
    let err = db
        .read_transaction(|tx| {
            let coll = tx.collection::<Tagged>()?;
            coll.count_distinct_ids_in_range(
                "by_tag",
                (
                    std::ops::Bound::Included(lower),
                    std::ops::Bound::Included(upper),
                ),
            )
        })
        .expect_err("must exceed the cap");
    assert!(
        matches!(err, obj::Error::DistinctCountExceeded { limit: 100_000 }),
        "expected DistinctCountExceeded, got {err:?}",
    );
}

// --- M8 follow-up #73: sort_by error-surfacing + sort_by_bytes ------

#[test]
fn sort_by_with_embedded_nul_string_returns_error() {
    // A `Dynamic::String` whose bytes contain a `0x00` is rejected by
    // the order-preserving encoder (the NUL is the inter-segment
    // terminator). Issue #73: this must surface as
    // Error::SortKeyEncode rather than collapsing to an empty key.
    let (db, _dir) = fresh_db();
    seed_orders(&db, 5);
    let err = db
        .query::<Order>()
        // Inject a NUL into the key the closure returns. The doc
        // content itself stays valid postcard — we only synthesise
        // the bad bytes inside the sort-key extractor.
        .sort_by(|_doc| Dynamic::String("has\0nul".to_owned()))
        .fetch()
        .expect_err("encode_field must reject embedded NUL");
    assert!(
        matches!(err, obj::Error::SortKeyEncode { .. }),
        "expected SortKeyEncode, got {err:?}",
    );
}

#[test]
fn sort_by_bytes_works_with_arbitrary_bytes() {
    // `sort_by_bytes` bypasses encode_field entirely; the caller owns
    // the byte-order = sort-order contract. Here we sort by raw BE
    // bytes of `placed_at`, which is the same ordering as the
    // natural `u64` ordering.
    let (db, _dir) = fresh_db();
    seed_reversed(&db, 100);
    let sorted: Vec<Order> = db
        .query::<Order>()
        .sort_by_bytes(|o| o.placed_at.to_be_bytes().to_vec())
        .fetch()
        .expect("sort_by_bytes fetch");
    assert_eq!(sorted.len(), 100);
    for w in sorted.windows(2) {
        assert!(
            w[0].placed_at <= w[1].placed_at,
            "ascending sort violated at {:?}/{:?}",
            w[0].placed_at,
            w[1].placed_at,
        );
    }
    assert_eq!(sorted[0].placed_at, 0);
    assert_eq!(sorted[99].placed_at, 99);
}

#[test]
fn sort_by_bytes_accepts_bytes_with_nul_unlike_sort_by() {
    // The whole point of sort_by_bytes: bytes that encode_field would
    // reject (containing 0x00) are fine here because the caller is
    // responsible for the byte-order = sort-order invariant.
    let (db, _dir) = fresh_db();
    seed_orders(&db, 5);
    let sorted: Vec<Order> = db
        .query::<Order>()
        // BE bytes of u64 are full of 0x00 for small values — and
        // they are still totally ordered, so this is a perfectly
        // valid sort key.
        .sort_by_bytes(|o| o.customer_id.to_be_bytes().to_vec())
        .fetch()
        .expect("sort_by_bytes with NUL-bearing keys");
    assert_eq!(sorted.len(), 5);
}

/// Spec test for issue #66's "200k items survive filtering with
/// default limit; `.sort_buffer_limit(200_001)` lets it through".
/// Populates via 1 000-doc batches per the M6 #52 cleanup pattern so
/// the WAL stays inside its 64 MiB default; the test is `#[ignore]`d
/// by default because the populate takes tens of seconds — `cargo
/// test --workspace` skips it, the CI's `--include-ignored` run
/// exercises the full contract.
#[test]
#[ignore = "200k populate is slow; default-cap behaviour also covered by query_sort_buffer_default_constant_is_one_hundred_thousand"]
fn query_sort_buffer_exceeded_default_fires_at_200k() {
    let (db, _dir) = fresh_db();
    let total: u64 = 200_001;
    let batch: u64 = 1_000;
    let mut inserted: u64 = 0;
    while inserted < total {
        let end = (inserted + batch).min(total);
        db.transaction(|tx| {
            let coll = tx.collection::<Order>()?;
            for i in inserted..end {
                let _ = coll.insert(Order {
                    customer_id: i,
                    status: "pending".to_owned(),
                    placed_at: i,
                })?;
            }
            Ok(())
        })
        .expect("batch insert");
        inserted = end;
    }

    let err = db
        .query::<Order>()
        .sort_by(|o| Dynamic::U64(o.placed_at))
        .fetch()
        .expect_err("default buffer must overflow at >100k");
    assert!(
        matches!(err, obj::Error::SortBufferExceeded { limit: 100_000 }),
        "expected SortBufferExceeded{{ limit: 100_000 }}, got {err:?}",
    );

    // Raising the cap past the doc count lets it through.
    let sorted: Vec<Order> = db
        .query::<Order>()
        .sort_by(|o| Dynamic::U64(o.placed_at))
        .sort_buffer_limit(200_001)
        .fetch()
        .expect("raised cap fetch");
    assert_eq!(sorted.len() as u64, total);
    assert_eq!(sorted[0].placed_at, 0);
    assert_eq!(sorted[sorted.len() - 1].placed_at, total - 1);
}

// --- Issue #74: bound-widening for non-Unique indexes --------------

/// `Tagged` doc lives above. A doc with a Standard `by_status` index
/// exists on `Customer` in `index_maintenance.rs`; here we declare a
/// fresh shape so this test file stays self-contained.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
struct Ticket {
    /// Status field carries the indexed key. Multiple tickets may
    /// share a status (Standard index, non-unique).
    status: String,
}

impl Document for Ticket {
    const COLLECTION: &'static str = "tickets";
    const VERSION: u32 = 1;

    fn indexes() -> Vec<IndexSpec> {
        vec![IndexSpec::standard("by_status", "status").expect("standard")]
    }
}

#[test]
fn standard_index_inclusive_equality_matches_all_entries() {
    // Issue #74 repro for Standard: 3 docs all with status="urgent".
    // The natural `Included("urgent")..=Included("urgent")` form
    // must match all 3, even though the B-tree keys carry the
    // M7 #60 trailing id_be8 suffix.
    let (db, _dir) = fresh_db();
    for _ in 0..3 {
        let _ = db
            .insert(Ticket {
                status: "urgent".to_owned(),
            })
            .expect("insert");
    }
    let hits: Vec<Ticket> = db
        .query::<Ticket>()
        .index_range(
            "by_status",
            Dynamic::from("urgent".to_owned())..=Dynamic::from("urgent".to_owned()),
        )
        .expect("index_range")
        .fetch()
        .expect("fetch");
    assert_eq!(hits.len(), 3, "all 3 'urgent' docs must match");
    assert!(hits.iter().all(|t| t.status == "urgent"));
}

#[test]
fn each_index_inclusive_equality_matches_all_entries() {
    // Issue #74 repro for Each: 2 docs each tagged ["urgent"]. The
    // natural `Included("urgent")..=Included("urgent")` form must
    // match both docs.
    let dir = TempDir::new().expect("tmp");
    let path = dir.path().join("each-eq74.obj");
    let db = Db::open(&path).expect("open");
    for i in 0..2 {
        let _ = db
            .insert(Tagged {
                name: format!("doc{i}"),
                tags: vec!["urgent".to_owned()],
            })
            .expect("insert");
    }
    let hits: Vec<Tagged> = db
        .query::<Tagged>()
        .index_range(
            "by_tag",
            Dynamic::from("urgent".to_owned())..=Dynamic::from("urgent".to_owned()),
        )
        .expect("index_range")
        .fetch()
        .expect("fetch");
    assert_eq!(hits.len(), 2, "both 'urgent'-tagged docs must match");
}

#[test]
fn excluded_lower_skips_user_key_entries_inclusive_upper_matches() {
    // Bound-widening contract: `Excluded("a")..=Included("b")` must
    // match every entry with status="b" but no entries with
    // status="a". This stresses the lower-excluded widening (skip
    // every `encode("a") || id_be8`) and the upper-included widening
    // (cover every `encode("b") || id_be8`).
    let (db, _dir) = fresh_db();
    // 2 docs at status="a", 3 docs at status="b".
    for _ in 0..2 {
        let _ = db
            .insert(Ticket {
                status: "a".to_owned(),
            })
            .expect("a insert");
    }
    for _ in 0..3 {
        let _ = db
            .insert(Ticket {
                status: "b".to_owned(),
            })
            .expect("b insert");
    }
    let hits: Vec<Ticket> = db
        .query::<Ticket>()
        .index_range(
            "by_status",
            (
                Bound::Excluded(Dynamic::from("a".to_owned())),
                Bound::Included(Dynamic::from("b".to_owned())),
            ),
        )
        .expect("index_range")
        .fetch()
        .expect("fetch");
    assert_eq!(hits.len(), 3, "only the 3 'b' docs match");
    assert!(hits.iter().all(|t| t.status == "b"));
}

#[test]
fn unique_inclusive_equality_still_matches_single_entry() {
    // Regression guard: the Unique branch of the helper must NOT
    // widen — `Unique` keys carry no id_be8 suffix, so widening
    // would over-match. `by_email` on `Customer` is the canonical
    // Unique fixture; we replicate the shape locally to stay in
    // `query_basic.rs`.
    #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
    struct UniqueDoc {
        email: String,
    }
    impl Document for UniqueDoc {
        const COLLECTION: &'static str = "uniq_docs_74";
        const VERSION: u32 = 1;
        fn indexes() -> Vec<IndexSpec> {
            vec![IndexSpec::unique("by_email", "email").expect("unique")]
        }
    }
    let dir = TempDir::new().expect("tmp");
    let path = dir.path().join("unique-eq74.obj");
    let db = Db::open(&path).expect("open");
    let _ = db
        .insert(UniqueDoc {
            email: "ada@example.com".to_owned(),
        })
        .expect("insert");
    let hits: Vec<UniqueDoc> = db
        .query::<UniqueDoc>()
        .index_range(
            "by_email",
            Dynamic::from("ada@example.com".to_owned())
                ..=Dynamic::from("ada@example.com".to_owned()),
        )
        .expect("index_range")
        .fetch()
        .expect("fetch");
    assert_eq!(hits.len(), 1, "Unique single-key inclusive range matches");
    assert_eq!(hits[0].email, "ada@example.com");
}

#[test]
fn composite_inclusive_equality_matches_all_entries() {
    // Composite: ("k", 1), ("k", 2), ("k", 3). A
    // `Included(("k", 1))..=Included(("k", 3))` range over the
    // composite index must match all 3 entries even though each
    // composite B-tree key carries a trailing id_be8 suffix.
    //
    // The `Query` layer's `index_range` only takes a single
    // `Dynamic`; composite multi-field bounds go through the
    // Collection-layer `index_range` with pre-encoded bound bytes
    // built via `encode_index_key`.
    #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
    struct Pair {
        a: String,
        b: u64,
    }
    impl Document for Pair {
        const COLLECTION: &'static str = "pairs_74";
        const VERSION: u32 = 1;
        fn indexes() -> Vec<IndexSpec> {
            vec![IndexSpec::composite("by_ab", &["a", "b"]).expect("composite")]
        }
    }
    let dir = TempDir::new().expect("tmp");
    let path = dir.path().join("composite-eq74.obj");
    let db = Db::open(&path).expect("open");
    for b in 1u64..=3 {
        let _ = db
            .insert(Pair {
                a: "k".to_owned(),
                b,
            })
            .expect("insert");
    }
    // Composite tuple bounds ("k", 1) and ("k", 3). A `Dynamic::Seq`
    // bound is encoded as a composite key by the Collection range
    // API (the same shape `encode_index_key` produces); the widening
    // helper adds the trailing id_be8-suffix cover internally.
    let lo = Dynamic::Seq(vec![Dynamic::from("k".to_owned()), Dynamic::U64(1)]);
    let hi = Dynamic::Seq(vec![Dynamic::from("k".to_owned()), Dynamic::U64(3)]);
    let pairs: Vec<(Vec<u8>, Pair)> = db
        .read_transaction(|tx| {
            tx.collection::<Pair>()?
                .index_range(
                    "by_ab",
                    (Bound::Included(lo.clone()), Bound::Included(hi.clone())),
                )?
                .collect()
        })
        .expect("composite range");
    assert_eq!(pairs.len(), 3, "all 3 composite entries must match");
    // All matches share a="k"; values for b are 1, 2, 3 in some order.
    let mut bs: Vec<u64> = pairs.iter().map(|(_k, p)| p.b).collect();
    bs.sort_unstable();
    assert_eq!(bs, vec![1, 2, 3]);
    assert!(pairs.iter().all(|(_k, p)| p.a == "k"));
}

// --- Phase 7A (M14 #14): Collection::iter_range streaming iterator ----

#[test]
fn iter_range_yields_same_set_as_index_range() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 50);
    // index_range materialises eagerly; iter_range streams the same
    // entries. Their (key, doc) outputs must be equal pairwise.
    let want: Vec<(Vec<u8>, Order)> = db
        .read_transaction(|tx| {
            tx.collection::<Order>()?
                .index_range(
                    "placed_at",
                    (
                        Bound::Included(Dynamic::U64(10)),
                        Bound::Excluded(Dynamic::U64(40)),
                    ),
                )?
                .collect()
        })
        .expect("index_range eager");
    let got: Vec<(Vec<u8>, Order)> = db
        .read_transaction(|tx| {
            let coll = tx.collection::<Order>()?;
            let iter = coll.iter_range(
                "placed_at",
                (
                    Bound::Included(Dynamic::U64(10)),
                    Bound::Excluded(Dynamic::U64(40)),
                ),
            )?;
            iter.collect::<Result<Vec<_>, _>>()
        })
        .expect("iter_range streaming");
    assert_eq!(got.len(), want.len(), "row counts must match");
    assert_eq!(got, want, "iter_range must yield index_range's set");
}

#[test]
fn iter_range_refills_across_batch_boundary() {
    // Insert > ITER_INDEX_RANGE_BATCH = 256 docs and confirm the
    // iterator transitions cleanly through multiple chunk refills.
    let (db, _dir) = fresh_db();
    seed_orders(&db, 1_000);
    let count = db
        .read_transaction(|tx| {
            let coll = tx.collection::<Order>()?;
            let iter = coll.iter_range::<(Bound<Dynamic>, Bound<Dynamic>)>(
                "placed_at",
                (Bound::Unbounded, Bound::Unbounded),
            )?;
            let mut n = 0usize;
            for step in iter {
                let _ = step?;
                n += 1;
            }
            Ok(n)
        })
        .expect("iter_range full scan");
    assert_eq!(count, 1_000, "every doc visible via iter_range");
}

#[test]
fn iter_range_empty_window_yields_nothing() {
    let (db, _dir) = fresh_db();
    seed_orders(&db, 20);
    // All placed_at < 100; window [1000, 2000) is empty.
    let count = db
        .read_transaction(|tx| {
            let coll = tx.collection::<Order>()?;
            let iter = coll.iter_range(
                "placed_at",
                (
                    Bound::Included(Dynamic::U64(1_000)),
                    Bound::Excluded(Dynamic::U64(2_000)),
                ),
            )?;
            let mut n = 0usize;
            for step in iter {
                let _ = step?;
                n += 1;
            }
            Ok(n)
        })
        .expect("iter_range empty");
    assert_eq!(count, 0);
}

#[test]
fn iter_range_each_kind_dedups_across_refills() {
    // Tagged docs use an `Each` index — a single doc may surface
    // multiple times under different tag keys. iter_range must
    // dedup by doc id even when the dedup set spans multiple chunk
    // refills.
    let (db, _dir) = fresh_db();
    // 300 docs each with 2 tags (600 index entries — > 256, so the
    // iterator's dedup set must persist across refills).
    let n: u64 = 300;
    db.transaction(|tx| {
        let coll = tx.collection::<Tagged>()?;
        for i in 0..n {
            let _ = coll.insert(Tagged {
                name: format!("d-{i}"),
                tags: vec!["urgent".to_owned(), format!("batch-{}", i % 3)],
            })?;
        }
        Ok(())
    })
    .expect("seed tagged");
    let (lo, hi) = equality_range("urgent");
    let docs: Vec<Tagged> = db
        .read_transaction(|tx| {
            let coll = tx.collection::<Tagged>()?;
            let iter = coll.iter_range("by_tag", (Bound::Included(lo), Bound::Included(hi)))?;
            iter.map(|s| s.map(|(_k, doc)| doc))
                .collect::<Result<Vec<_>, _>>()
        })
        .expect("iter_range Each dedup");
    let usize_n = usize::try_from(n).expect("usize fits u64");
    assert_eq!(docs.len(), usize_n, "every urgent-tagged doc yielded once");
    let names: std::collections::HashSet<String> = docs.into_iter().map(|d| d.name).collect();
    assert_eq!(names.len(), usize_n, "no doc emitted twice");
}

#[test]
fn iter_range_lazy_mode_matches_streaming_mode() {
    // Lazy-mode handle (Db::collection(...) — no enclosing txn);
    // iter_range falls back to eager materialisation but yields the
    // same set as the in-txn streaming path.
    let (db, _dir) = fresh_db();
    seed_orders(&db, 30);
    let coll = db.collection::<Order>("orders");
    let lazy_docs: Vec<Order> = coll
        .iter_range(
            "placed_at",
            (
                Bound::Included(Dynamic::U64(5)),
                Bound::Excluded(Dynamic::U64(25)),
            ),
        )
        .expect("iter_range lazy")
        .map(|s| s.expect("step").1)
        .collect();
    let streaming_docs: Vec<Order> = db
        .read_transaction(|tx| {
            tx.collection::<Order>()?
                .iter_range(
                    "placed_at",
                    (
                        Bound::Included(Dynamic::U64(5)),
                        Bound::Excluded(Dynamic::U64(25)),
                    ),
                )?
                .map(|s| s.map(|(_k, d)| d))
                .collect::<Result<Vec<_>, _>>()
        })
        .expect("iter_range streaming");
    assert_eq!(lazy_docs, streaming_docs, "lazy and streaming sets agree");
}