Skip to main content

bison_db/
store.rs

1//! The single-file document store: [`Db`].
2//!
3//! `bison-db` persists documents to one append-only file. Every write — insert,
4//! overwrite, or delete — appends a self-describing record to the tail; the file
5//! is never edited in place. An in-memory index maps each live document id to
6//! the byte offset of its most recent record, so a read is one hash lookup and
7//! one positional read. This log-structured design makes writes sequential
8//! (the pattern disks and SSDs serve fastest) and keeps a crash from corrupting
9//! data already on disk: a half-written record at the tail is detected by its
10//! length and checksum and dropped on the next open.
11//!
12//! ## Record framing
13//!
14//! The file opens with a fixed header (magic plus a format version), then a run
15//! of records. Each record is an 8-byte frame (`u32` payload length, `u32`
16//! CRC-32C of the payload) followed by the payload itself: a one-byte operation
17//! tag, the 8-byte document id, and — for an insert or overwrite — the encoded
18//! document body. A delete writes a tombstone with no body.
19//!
20//! ## Durability
21//!
22//! A record reaches the OS page cache as soon as it is written, so it is visible
23//! to later reads in the same process immediately. When it becomes durable
24//! against a power loss is governed by the store's [`SyncPolicy`]:
25//!
26//! - [`SyncPolicy::Always`] forces an `fsync` after every write, so each
27//!   operation is durable the moment it returns.
28//! - [`SyncPolicy::Manual`] (the default) syncs only on [`Db::flush`] and once,
29//!   best-effort, on drop. It is faster, and writes remain crash-*safe* — a torn
30//!   write is never misread — but the most recent unsynced writes can be lost on
31//!   power loss.
32//!
33//! Either way the on-disk invariant holds: a crash never tears a record that was
34//! already durable. On a newly created file, the parent directory is `fsync`ed
35//! so the file's existence is itself durable.
36
37use std::collections::HashMap;
38use std::fmt;
39use std::fs::{File, OpenOptions};
40use std::ops::RangeBounds;
41use std::path::{Path, PathBuf};
42
43use crate::codec::{crc32c, decode_document, encode_document_into};
44use crate::error::{Error, Result};
45use crate::index::{SecondaryIndex, in_bounds, total_cmp_value};
46use crate::sys::{read_exact_at, write_all_at};
47use crate::value::{Document, Value};
48
49/// The largest record payload the store will write or accept while reading.
50///
51/// A document encodes to at most this many bytes; a larger one is rejected with
52/// [`Error::ValueTooLarge`] on write. On read, any framed length above this cap
53/// is treated as corruption, which bounds the allocation the recovery path can
54/// be asked to make from a damaged file.
55pub const MAX_RECORD_BYTES: usize = 64 * 1024 * 1024;
56
57/// Magic bytes at the start of every store file. The trailing digit tracks the
58/// header layout, distinct from the format version that follows it.
59const HEADER_MAGIC: [u8; 8] = *b"BISONDB1";
60
61/// On-disk format version. Frozen at `1` as of v0.4.0: the layout described in
62/// `docs/FORMAT.md` is stable, and files written by 0.2.0 onward are readable by
63/// every later release. Bumped only on an incompatible record-layout change,
64/// which would be a major-version event.
65const FORMAT_VERSION: u16 = 1;
66
67/// Length of the file header: 8 magic bytes, a `u16` version, 6 reserved bytes.
68const HEADER_LEN: u64 = 16;
69
70/// Size of a record frame: a `u32` length followed by a `u32` checksum.
71const FRAME_LEN: usize = 8;
72
73/// Smallest legal payload: a one-byte op tag plus an 8-byte id, with no body
74/// (the shape of a delete tombstone).
75const MIN_PAYLOAD: usize = 1 + 8;
76
77/// Operation tag for an insert or overwrite: the payload carries a document body.
78const OP_PUT: u8 = 1;
79
80/// Operation tag for a delete: the payload is the op tag and id only.
81const OP_DELETE: u8 = 2;
82
83/// A document's primary key within a [`Db`].
84///
85/// Ids are assigned by [`Db::insert`] as a dense, monotonically increasing
86/// sequence starting at 1; `0` is never assigned and can be used as a sentinel.
87/// The id is stable for the life of the document and survives reopening the
88/// file. Reconstruct one with [`DocId::from`] when you have stored it elsewhere.
89///
90/// # Examples
91///
92/// ```
93/// use bison_db::DocId;
94/// let id = DocId::from(7);
95/// assert_eq!(id.get(), 7);
96/// assert_eq!(id.to_string(), "7");
97/// ```
98#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
99pub struct DocId(u64);
100
101impl DocId {
102    /// Returns the underlying `u64`.
103    ///
104    /// # Examples
105    ///
106    /// ```
107    /// use bison_db::DocId;
108    /// assert_eq!(DocId::from(42).get(), 42);
109    /// ```
110    #[inline]
111    #[must_use]
112    pub const fn get(self) -> u64 {
113        self.0
114    }
115}
116
117impl From<u64> for DocId {
118    #[inline]
119    fn from(raw: u64) -> Self {
120        DocId(raw)
121    }
122}
123
124impl From<DocId> for u64 {
125    #[inline]
126    fn from(id: DocId) -> Self {
127        id.0
128    }
129}
130
131impl fmt::Display for DocId {
132    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
133        write!(f, "{}", self.0)
134    }
135}
136
137/// Where a live document's body sits in the file.
138#[derive(Clone, Copy)]
139struct BodyLoc {
140    /// Byte offset of the encoded document body.
141    offset: u64,
142    /// Length of the encoded document body in bytes.
143    len: u32,
144}
145
146/// A point-in-time summary of a store's size and contents.
147///
148/// Returned by [`Db::stats`]. The gap between `file_bytes` and `live_bytes`
149/// (plus framing) is space held by superseded and deleted records — the slack a
150/// future compaction step will reclaim.
151///
152/// # Examples
153///
154/// ```no_run
155/// # fn main() -> bison_db::Result<()> {
156/// let db = bison_db::Db::open("data.bison")?;
157/// let stats = db.stats();
158/// println!("{} live documents in {} bytes", stats.live_documents, stats.file_bytes);
159/// # Ok(())
160/// # }
161/// ```
162#[derive(Clone, Copy, Debug, PartialEq, Eq)]
163pub struct Stats {
164    /// Number of documents currently readable.
165    pub live_documents: usize,
166    /// Total size of the file on disk, in bytes.
167    pub file_bytes: u64,
168    /// Bytes occupied by the bodies of live documents, excluding framing.
169    pub live_bytes: u64,
170}
171
172/// When a write is made durable on disk.
173///
174/// bison-db never holds writes in a userspace buffer — every write reaches the
175/// operating system immediately and is visible to later reads. This policy
176/// controls only when the store forces those bytes through the OS cache to the
177/// physical device with `fsync`, which is what protects them from a power loss.
178///
179/// # Examples
180///
181/// ```
182/// # fn main() -> bison_db::Result<()> {
183/// use bison_db::{DbOptions, SyncPolicy};
184/// # let path = std::env::temp_dir().join("bison_db_syncpolicy_doc.bison");
185/// # let _ = std::fs::remove_file(&path);
186/// // Durable per write, at the cost of an fsync on every insert/update/delete.
187/// let db = DbOptions::new().sync(SyncPolicy::Always).open(&path)?;
188/// # drop(db);
189/// # let _ = std::fs::remove_file(&path);
190/// # Ok(())
191/// # }
192/// ```
193#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
194pub enum SyncPolicy {
195    /// `fsync` after every write before it returns. Each insert, update, and
196    /// delete is durable the moment the call completes, at the cost of one
197    /// device sync per operation.
198    Always,
199    /// `fsync` only when [`Db::flush`] is called (and once, best-effort, when the
200    /// store is dropped). Writes are still crash-*safe* — a torn write is never
201    /// misread — but the most recent unsynced writes can be lost on power loss.
202    /// This is the default, and the fastest policy.
203    #[default]
204    Manual,
205}
206
207/// Options for opening a [`Db`], built fluently and finished with
208/// [`open`](DbOptions::open).
209///
210/// Use this when the default [`Db::open`] is not enough — currently, to choose a
211/// [`SyncPolicy`]. The set of options is intentionally small and will only grow
212/// additively.
213///
214/// # Examples
215///
216/// ```
217/// # fn main() -> bison_db::Result<()> {
218/// use bison_db::{DbOptions, SyncPolicy};
219/// # let path = std::env::temp_dir().join("bison_db_dboptions_doc.bison");
220/// # let _ = std::fs::remove_file(&path);
221/// let db = DbOptions::new().sync(SyncPolicy::Always).open(&path)?;
222/// assert_eq!(db.sync_policy(), SyncPolicy::Always);
223/// # drop(db);
224/// # let _ = std::fs::remove_file(&path);
225/// # Ok(())
226/// # }
227/// ```
228#[derive(Clone, Copy, Debug, Default)]
229pub struct DbOptions {
230    sync: SyncPolicy,
231}
232
233impl DbOptions {
234    /// Creates options with the defaults ([`SyncPolicy::Manual`]).
235    ///
236    /// # Examples
237    ///
238    /// ```
239    /// use bison_db::{DbOptions, SyncPolicy};
240    /// assert_eq!(DbOptions::new().build_sync_policy(), SyncPolicy::Manual);
241    /// ```
242    #[must_use]
243    pub fn new() -> Self {
244        DbOptions::default()
245    }
246
247    /// Sets the [`SyncPolicy`] for the store.
248    ///
249    /// # Examples
250    ///
251    /// ```
252    /// use bison_db::{DbOptions, SyncPolicy};
253    /// let opts = DbOptions::new().sync(SyncPolicy::Always);
254    /// assert_eq!(opts.build_sync_policy(), SyncPolicy::Always);
255    /// ```
256    #[must_use]
257    pub fn sync(mut self, policy: SyncPolicy) -> Self {
258        self.sync = policy;
259        self
260    }
261
262    /// Returns the [`SyncPolicy`] these options currently carry.
263    ///
264    /// # Examples
265    ///
266    /// ```
267    /// use bison_db::{DbOptions, SyncPolicy};
268    /// assert_eq!(DbOptions::new().build_sync_policy(), SyncPolicy::Manual);
269    /// ```
270    #[must_use]
271    pub fn build_sync_policy(&self) -> SyncPolicy {
272        self.sync
273    }
274
275    /// Opens (or creates) the store at `path` with these options.
276    ///
277    /// Equivalent to [`Db::open`] when the options are the defaults.
278    ///
279    /// # Errors
280    ///
281    /// Same as [`Db::open`].
282    ///
283    /// # Examples
284    ///
285    /// ```
286    /// # fn main() -> bison_db::Result<()> {
287    /// use bison_db::{DbOptions, SyncPolicy};
288    /// # let path = std::env::temp_dir().join("bison_db_dboptions_open_doc.bison");
289    /// # let _ = std::fs::remove_file(&path);
290    /// let db = DbOptions::new().sync(SyncPolicy::Always).open(&path)?;
291    /// # drop(db);
292    /// # let _ = std::fs::remove_file(&path);
293    /// # Ok(())
294    /// # }
295    /// ```
296    pub fn open<P: AsRef<Path>>(self, path: P) -> Result<Db> {
297        Db::open_inner(path.as_ref().to_path_buf(), self.sync)
298    }
299}
300
301/// An embedded document store backed by a single append-only file.
302///
303/// Open one with [`Db::open`], then [`insert`](Db::insert),
304/// [`get`](Db::get), [`update`](Db::update), and [`delete`](Db::delete)
305/// documents by id. Reads take `&self` and writes take `&mut self`, so the
306/// compiler enforces single-writer access; share a `Db` across threads by
307/// placing it behind your own lock. Call [`flush`](Db::flush) to make recent
308/// writes durable.
309///
310/// # Examples
311///
312/// ```
313/// # fn main() -> bison_db::Result<()> {
314/// use bison_db::{Db, Document};
315///
316/// let dir = std::env::temp_dir().join("bison_db_doc_example");
317/// let _ = std::fs::remove_file(&dir);
318/// let mut db = Db::open(&dir)?;
319///
320/// let mut user = Document::new();
321/// user.set("name", "grace").set("born", 1906_i64);
322/// let id = db.insert(user)?;
323///
324/// let fetched = db.get(id)?.expect("just inserted");
325/// assert_eq!(fetched.get("name").and_then(|v| v.as_str()), Some("grace"));
326///
327/// db.flush()?;
328/// # let _ = std::fs::remove_file(&dir);
329/// # Ok(())
330/// # }
331/// ```
332pub struct Db {
333    /// The open store file, used for both positional reads and tail appends.
334    file: File,
335    /// Path the store was opened from, returned by [`Db::path`].
336    path: PathBuf,
337    /// Live document id to the location of its most recent body.
338    index: HashMap<u64, BodyLoc>,
339    /// Offset at which the next record will be appended.
340    tail: u64,
341    /// Id that the next [`Db::insert`] will assign.
342    next_id: u64,
343    /// Reusable buffer for framing a record, so writes do not allocate.
344    scratch: Vec<u8>,
345    /// Secondary indexes by field name, built on demand and maintained on every
346    /// write. Not persisted: rebuilt via [`Db::create_index`] each session.
347    indexes: HashMap<String, SecondaryIndex>,
348    /// When to force writes to disk with `fsync`.
349    sync: SyncPolicy,
350}
351
352impl Db {
353    /// Opens the store at `path`, creating an empty one if the file does not
354    /// exist, and replaying any existing records to rebuild the index.
355    ///
356    /// On open the whole log is scanned: each record's checksum is verified and
357    /// the in-memory index is reconstructed from the surviving inserts and
358    /// deletes. A record left half-written by a crash — detectable because it
359    /// runs past the end of the file or fails its checksum at the tail — is
360    /// truncated away, restoring the file to its last consistent state. A
361    /// checksum failure on a record that is *not* at the tail is reported as
362    /// [`Error::Corrupt`], because that indicates in-place damage rather than a
363    /// torn write.
364    ///
365    /// Uses [`SyncPolicy::Manual`]; for a different policy, open through
366    /// [`DbOptions`].
367    ///
368    /// # Errors
369    ///
370    /// Returns [`Error::Io`] if the file cannot be opened or read,
371    /// [`Error::BadMagic`] if an existing file is not a bison-db store,
372    /// [`Error::UnsupportedVersion`] if it was written by a newer format, and
373    /// [`Error::Corrupt`] if a non-tail record fails verification.
374    ///
375    /// # Examples
376    ///
377    /// ```
378    /// # fn main() -> bison_db::Result<()> {
379    /// let path = std::env::temp_dir().join("bison_db_open_example.bison");
380    /// let _ = std::fs::remove_file(&path);
381    /// let db = bison_db::Db::open(&path)?;
382    /// assert!(db.is_empty());
383    /// # let _ = std::fs::remove_file(&path);
384    /// # Ok(())
385    /// # }
386    /// ```
387    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
388        DbOptions::new().open(path)
389    }
390
391    /// Opens (or creates) the store at `path` with the given [`DbOptions`].
392    ///
393    /// A shorthand for [`DbOptions::open`]; see [`Db::open`] for the open and
394    /// recovery contract.
395    ///
396    /// # Errors
397    ///
398    /// Same as [`Db::open`].
399    ///
400    /// # Examples
401    ///
402    /// ```
403    /// # fn main() -> bison_db::Result<()> {
404    /// use bison_db::{Db, DbOptions, SyncPolicy};
405    /// # let path = std::env::temp_dir().join("bison_db_open_with_example.bison");
406    /// # let _ = std::fs::remove_file(&path);
407    /// let db = Db::open_with(&path, DbOptions::new().sync(SyncPolicy::Always))?;
408    /// assert_eq!(db.sync_policy(), SyncPolicy::Always);
409    /// # drop(db);
410    /// # let _ = std::fs::remove_file(&path);
411    /// # Ok(())
412    /// # }
413    /// ```
414    pub fn open_with<P: AsRef<Path>>(path: P, options: DbOptions) -> Result<Self> {
415        options.open(path)
416    }
417
418    /// The shared open path used by [`Db::open`] and [`DbOptions::open`].
419    fn open_inner(path: PathBuf, sync: SyncPolicy) -> Result<Self> {
420        let file = OpenOptions::new()
421            .read(true)
422            .write(true)
423            .create(true)
424            .truncate(false)
425            .open(&path)?;
426        let file_len = file.metadata()?.len();
427
428        let mut db = Db {
429            file,
430            path,
431            index: HashMap::new(),
432            tail: HEADER_LEN,
433            next_id: 1,
434            scratch: Vec::with_capacity(256),
435            indexes: HashMap::new(),
436            sync,
437        };
438
439        if file_len == 0 {
440            db.write_header()?;
441            // Make the newly created file's directory entry durable, so the file
442            // is guaranteed to exist after a crash that follows creation.
443            sync_parent_dir(&db.path)?;
444        } else {
445            db.verify_header(file_len)?;
446            db.replay(file_len)?;
447        }
448        Ok(db)
449    }
450
451    /// Returns the store's [`SyncPolicy`].
452    ///
453    /// # Examples
454    ///
455    /// ```
456    /// # fn main() -> bison_db::Result<()> {
457    /// use bison_db::{Db, SyncPolicy};
458    /// # let path = std::env::temp_dir().join("bison_db_syncpolicy_getter.bison");
459    /// # let _ = std::fs::remove_file(&path);
460    /// let db = Db::open(&path)?;
461    /// assert_eq!(db.sync_policy(), SyncPolicy::Manual);
462    /// # drop(db);
463    /// # let _ = std::fs::remove_file(&path);
464    /// # Ok(())
465    /// # }
466    /// ```
467    #[must_use]
468    pub fn sync_policy(&self) -> SyncPolicy {
469        self.sync
470    }
471
472    /// Inserts `doc`, assigning and returning a fresh [`DocId`].
473    ///
474    /// The document is appended to the log and indexed; it is readable
475    /// immediately and durable after the next [`flush`](Db::flush).
476    ///
477    /// # Errors
478    ///
479    /// Returns [`Error::ValueTooLarge`] if the encoded document exceeds
480    /// [`MAX_RECORD_BYTES`], or [`Error::Io`] if the append fails.
481    ///
482    /// # Examples
483    ///
484    /// ```
485    /// # fn main() -> bison_db::Result<()> {
486    /// # let path = std::env::temp_dir().join("bison_db_insert_example.bison");
487    /// # let _ = std::fs::remove_file(&path);
488    /// use bison_db::{Db, Document};
489    /// let mut db = Db::open(&path)?;
490    /// let mut doc = Document::new();
491    /// doc.set("k", "v");
492    /// let id = db.insert(doc)?;
493    /// assert!(db.contains(id));
494    /// # let _ = std::fs::remove_file(&path);
495    /// # Ok(())
496    /// # }
497    /// ```
498    pub fn insert(&mut self, doc: Document) -> Result<DocId> {
499        let id = self.next_id;
500        self.append(OP_PUT, id, Some(&doc))?;
501        self.next_id = id + 1;
502        self.index_add(id, &doc);
503        Ok(DocId(id))
504    }
505
506    /// Reads the document stored under `id`, or `None` if no live document has
507    /// that id.
508    ///
509    /// # Errors
510    ///
511    /// Returns [`Error::Io`] if the body cannot be read, or [`Error::Corrupt`]
512    /// if the stored bytes fail to decode (which a passing checksum makes
513    /// unexpected in practice).
514    ///
515    /// # Examples
516    ///
517    /// ```
518    /// # fn main() -> bison_db::Result<()> {
519    /// # let path = std::env::temp_dir().join("bison_db_get_example.bison");
520    /// # let _ = std::fs::remove_file(&path);
521    /// use bison_db::{Db, Document, DocId};
522    /// let mut db = Db::open(&path)?;
523    /// let id = db.insert({ let mut d = Document::new(); d.set("n", 1_i64); d })?;
524    /// assert!(db.get(id)?.is_some());
525    /// assert!(db.get(DocId::from(9999))?.is_none());
526    /// # let _ = std::fs::remove_file(&path);
527    /// # Ok(())
528    /// # }
529    /// ```
530    pub fn get(&self, id: DocId) -> Result<Option<Document>> {
531        match self.index.get(&id.0).copied() {
532            Some(loc) => self.read_body(loc).map(Some),
533            None => Ok(None),
534        }
535    }
536
537    /// Overwrites the document stored under `id` with `doc`, returning `true` if
538    /// a document was present to overwrite and `false` otherwise.
539    ///
540    /// A successful update appends a new record and repoints the index; the
541    /// previous body remains in the file as dead space until compaction.
542    ///
543    /// # Errors
544    ///
545    /// Returns [`Error::ValueTooLarge`] or [`Error::Io`] under the same
546    /// conditions as [`insert`](Db::insert).
547    ///
548    /// # Examples
549    ///
550    /// ```
551    /// # fn main() -> bison_db::Result<()> {
552    /// # let path = std::env::temp_dir().join("bison_db_update_example.bison");
553    /// # let _ = std::fs::remove_file(&path);
554    /// use bison_db::{Db, Document, DocId};
555    /// let mut db = Db::open(&path)?;
556    /// let id = db.insert({ let mut d = Document::new(); d.set("v", 1_i64); d })?;
557    ///
558    /// let mut next = Document::new();
559    /// next.set("v", 2_i64);
560    /// assert!(db.update(id, next)?);
561    /// assert!(!db.update(DocId::from(404), Document::new())?);
562    /// # let _ = std::fs::remove_file(&path);
563    /// # Ok(())
564    /// # }
565    /// ```
566    pub fn update(&mut self, id: DocId, doc: Document) -> Result<bool> {
567        let Some(loc) = self.index.get(&id.0).copied() else {
568            return Ok(false);
569        };
570        if !self.indexes.is_empty() {
571            let old = self.read_body(loc)?;
572            self.index_remove(id.0, &old);
573        }
574        self.append(OP_PUT, id.0, Some(&doc))?;
575        self.index_add(id.0, &doc);
576        Ok(true)
577    }
578
579    /// Deletes the document stored under `id`, returning `true` if one was
580    /// present and `false` otherwise.
581    ///
582    /// A tombstone is appended so the deletion survives reopening; the document
583    /// is unreadable as soon as this returns.
584    ///
585    /// # Errors
586    ///
587    /// Returns [`Error::Io`] if the tombstone cannot be appended.
588    ///
589    /// # Examples
590    ///
591    /// ```
592    /// # fn main() -> bison_db::Result<()> {
593    /// # let path = std::env::temp_dir().join("bison_db_delete_example.bison");
594    /// # let _ = std::fs::remove_file(&path);
595    /// use bison_db::{Db, Document};
596    /// let mut db = Db::open(&path)?;
597    /// let id = db.insert({ let mut d = Document::new(); d.set("x", 1_i64); d })?;
598    /// assert!(db.delete(id)?);
599    /// assert!(db.get(id)?.is_none());
600    /// assert!(!db.delete(id)?);
601    /// # let _ = std::fs::remove_file(&path);
602    /// # Ok(())
603    /// # }
604    /// ```
605    pub fn delete(&mut self, id: DocId) -> Result<bool> {
606        let Some(loc) = self.index.get(&id.0).copied() else {
607            return Ok(false);
608        };
609        if !self.indexes.is_empty() {
610            let old = self.read_body(loc)?;
611            self.index_remove(id.0, &old);
612        }
613        self.append(OP_DELETE, id.0, None)?;
614        Ok(true)
615    }
616
617    /// Returns `true` if a live document has this `id`.
618    ///
619    /// This is an in-memory index lookup with no file access.
620    ///
621    /// # Examples
622    ///
623    /// ```
624    /// # fn main() -> bison_db::Result<()> {
625    /// # let path = std::env::temp_dir().join("bison_db_contains_example.bison");
626    /// # let _ = std::fs::remove_file(&path);
627    /// use bison_db::{Db, Document};
628    /// let mut db = Db::open(&path)?;
629    /// let id = db.insert(Document::new())?;
630    /// assert!(db.contains(id));
631    /// # let _ = std::fs::remove_file(&path);
632    /// # Ok(())
633    /// # }
634    /// ```
635    #[must_use]
636    pub fn contains(&self, id: DocId) -> bool {
637        self.index.contains_key(&id.0)
638    }
639
640    /// Returns the number of live documents.
641    ///
642    /// # Examples
643    ///
644    /// ```
645    /// # fn main() -> bison_db::Result<()> {
646    /// # let path = std::env::temp_dir().join("bison_db_len_example.bison");
647    /// # let _ = std::fs::remove_file(&path);
648    /// use bison_db::{Db, Document};
649    /// let mut db = Db::open(&path)?;
650    /// db.insert(Document::new())?;
651    /// assert_eq!(db.len(), 1);
652    /// # let _ = std::fs::remove_file(&path);
653    /// # Ok(())
654    /// # }
655    /// ```
656    #[must_use]
657    pub fn len(&self) -> usize {
658        self.index.len()
659    }
660
661    /// Returns `true` if the store holds no live documents.
662    ///
663    /// # Examples
664    ///
665    /// ```
666    /// # fn main() -> bison_db::Result<()> {
667    /// # let path = std::env::temp_dir().join("bison_db_isempty_example.bison");
668    /// # let _ = std::fs::remove_file(&path);
669    /// let db = bison_db::Db::open(&path)?;
670    /// assert!(db.is_empty());
671    /// # let _ = std::fs::remove_file(&path);
672    /// # Ok(())
673    /// # }
674    /// ```
675    #[must_use]
676    pub fn is_empty(&self) -> bool {
677        self.index.is_empty()
678    }
679
680    /// Returns an iterator over the ids of all live documents.
681    ///
682    /// The order is unspecified and may change between runs; collect and sort if
683    /// you need a stable order.
684    ///
685    /// # Examples
686    ///
687    /// ```
688    /// # fn main() -> bison_db::Result<()> {
689    /// # let path = std::env::temp_dir().join("bison_db_ids_example.bison");
690    /// # let _ = std::fs::remove_file(&path);
691    /// use bison_db::{Db, Document};
692    /// let mut db = Db::open(&path)?;
693    /// db.insert(Document::new())?;
694    /// db.insert(Document::new())?;
695    /// assert_eq!(db.ids().count(), 2);
696    /// # let _ = std::fs::remove_file(&path);
697    /// # Ok(())
698    /// # }
699    /// ```
700    pub fn ids(&self) -> impl Iterator<Item = DocId> + '_ {
701        self.index.keys().copied().map(DocId)
702    }
703
704    /// Flushes buffered writes and `fsync`s the file, making every preceding
705    /// write durable against power loss.
706    ///
707    /// # Errors
708    ///
709    /// Returns [`Error::Io`] if the sync fails.
710    ///
711    /// # Examples
712    ///
713    /// ```
714    /// # fn main() -> bison_db::Result<()> {
715    /// # let path = std::env::temp_dir().join("bison_db_flush_example.bison");
716    /// # let _ = std::fs::remove_file(&path);
717    /// use bison_db::{Db, Document};
718    /// let mut db = Db::open(&path)?;
719    /// db.insert(Document::new())?;
720    /// db.flush()?;
721    /// # let _ = std::fs::remove_file(&path);
722    /// # Ok(())
723    /// # }
724    /// ```
725    pub fn flush(&mut self) -> Result<()> {
726        self.file.sync_all()?;
727        Ok(())
728    }
729
730    /// Returns the path the store was opened from.
731    ///
732    /// # Examples
733    ///
734    /// ```
735    /// # fn main() -> bison_db::Result<()> {
736    /// # let path = std::env::temp_dir().join("bison_db_path_example.bison");
737    /// # let _ = std::fs::remove_file(&path);
738    /// let db = bison_db::Db::open(&path)?;
739    /// assert_eq!(db.path(), path.as_path());
740    /// # let _ = std::fs::remove_file(&path);
741    /// # Ok(())
742    /// # }
743    /// ```
744    #[must_use]
745    pub fn path(&self) -> &Path {
746        &self.path
747    }
748
749    /// Returns a [`Stats`] snapshot of the store's size and live contents.
750    ///
751    /// # Examples
752    ///
753    /// ```
754    /// # fn main() -> bison_db::Result<()> {
755    /// # let path = std::env::temp_dir().join("bison_db_stats_example.bison");
756    /// # let _ = std::fs::remove_file(&path);
757    /// use bison_db::{Db, Document};
758    /// let mut db = Db::open(&path)?;
759    /// db.insert(Document::new())?;
760    /// assert_eq!(db.stats().live_documents, 1);
761    /// # let _ = std::fs::remove_file(&path);
762    /// # Ok(())
763    /// # }
764    /// ```
765    #[must_use]
766    pub fn stats(&self) -> Stats {
767        let live_bytes = self.index.values().map(|loc| u64::from(loc.len)).sum();
768        Stats {
769            live_documents: self.index.len(),
770            file_bytes: self.tail,
771            live_bytes,
772        }
773    }
774
775    /// Builds a secondary index over `field`, making [`find`](Db::find) and
776    /// [`range`](Db::range) on that field fast point and range lookups instead of
777    /// full scans.
778    ///
779    /// The index is built by reading every live document once and recording its
780    /// value for `field`; documents without the field are skipped. From then on,
781    /// it is maintained automatically on every insert, update, and delete. Any
782    /// number of fields may be indexed — call this once per field.
783    ///
784    /// Indexes live in memory only and are **not** persisted: after reopening a
785    /// store, call this again for each field you want indexed. Calling it for a
786    /// field that is already indexed is a no-op.
787    ///
788    /// # Errors
789    ///
790    /// Returns [`Error::Io`] or [`Error::Corrupt`] if a document cannot be read
791    /// while building the index.
792    ///
793    /// # Examples
794    ///
795    /// ```
796    /// # fn main() -> bison_db::Result<()> {
797    /// # let path = std::env::temp_dir().join("bison_db_createindex_example.bison");
798    /// # let _ = std::fs::remove_file(&path);
799    /// use bison_db::{Db, Document, Value};
800    /// let mut db = Db::open(&path)?;
801    /// db.insert({ let mut d = Document::new(); d.set("city", "Oslo"); d })?;
802    ///
803    /// db.create_index("city")?;
804    /// let hits = db.find("city", &Value::from("Oslo"))?;
805    /// assert_eq!(hits.len(), 1);
806    /// # let _ = std::fs::remove_file(&path);
807    /// # Ok(())
808    /// # }
809    /// ```
810    pub fn create_index(&mut self, field: &str) -> Result<()> {
811        if self.indexes.contains_key(field) {
812            return Ok(());
813        }
814        let mut index = SecondaryIndex::new();
815        let entries: Vec<(u64, BodyLoc)> = self.index.iter().map(|(id, loc)| (*id, *loc)).collect();
816        for (id, loc) in entries {
817            let doc = self.read_body(loc)?;
818            if let Some(value) = doc.get(field) {
819                index.add(value, id);
820            }
821        }
822        let _ = self.indexes.insert(field.to_string(), index);
823        Ok(())
824    }
825
826    /// Drops the secondary index over `field`, returning `true` if one existed.
827    ///
828    /// # Examples
829    ///
830    /// ```
831    /// # fn main() -> bison_db::Result<()> {
832    /// # let path = std::env::temp_dir().join("bison_db_dropindex_example.bison");
833    /// # let _ = std::fs::remove_file(&path);
834    /// let mut db = bison_db::Db::open(&path)?;
835    /// db.create_index("name")?;
836    /// assert!(db.drop_index("name"));
837    /// assert!(!db.drop_index("name"));
838    /// # let _ = std::fs::remove_file(&path);
839    /// # Ok(())
840    /// # }
841    /// ```
842    pub fn drop_index(&mut self, field: &str) -> bool {
843        self.indexes.remove(field).is_some()
844    }
845
846    /// Returns an iterator over the names of the currently indexed fields.
847    ///
848    /// The order is unspecified.
849    ///
850    /// # Examples
851    ///
852    /// ```
853    /// # fn main() -> bison_db::Result<()> {
854    /// # let path = std::env::temp_dir().join("bison_db_indexes_example.bison");
855    /// # let _ = std::fs::remove_file(&path);
856    /// let mut db = bison_db::Db::open(&path)?;
857    /// db.create_index("a")?;
858    /// db.create_index("b")?;
859    /// assert_eq!(db.indexes().count(), 2);
860    /// # let _ = std::fs::remove_file(&path);
861    /// # Ok(())
862    /// # }
863    /// ```
864    pub fn indexes(&self) -> impl Iterator<Item = &str> {
865        self.indexes.keys().map(String::as_str)
866    }
867
868    /// Returns the ids of all live documents whose `field` equals `value`.
869    ///
870    /// If `field` is indexed (see [`create_index`](Db::create_index)) this is a
871    /// point lookup; otherwise it falls back to scanning every live document, so
872    /// the result is correct either way — the index only changes the speed.
873    /// Equality follows the same total order the indexes use, so a `Float` field
874    /// distinguishes `0.0` from `-0.0`.
875    ///
876    /// # Errors
877    ///
878    /// Returns [`Error::Io`] or [`Error::Corrupt`] if a document must be read
879    /// (the unindexed path) and cannot be.
880    ///
881    /// # Examples
882    ///
883    /// ```
884    /// # fn main() -> bison_db::Result<()> {
885    /// # let path = std::env::temp_dir().join("bison_db_find_example.bison");
886    /// # let _ = std::fs::remove_file(&path);
887    /// use bison_db::{Db, Document, Value};
888    /// let mut db = Db::open(&path)?;
889    /// db.insert({ let mut d = Document::new(); d.set("role", "admin"); d })?;
890    /// db.insert({ let mut d = Document::new(); d.set("role", "user"); d })?;
891    /// db.create_index("role")?;
892    ///
893    /// assert_eq!(db.find("role", &Value::from("admin"))?.len(), 1);
894    /// assert!(db.find("role", &Value::from("ghost"))?.is_empty());
895    /// # let _ = std::fs::remove_file(&path);
896    /// # Ok(())
897    /// # }
898    /// ```
899    pub fn find(&self, field: &str, value: &Value) -> Result<Vec<DocId>> {
900        if let Some(index) = self.indexes.get(field) {
901            return Ok(index.equal(value).into_iter().map(DocId).collect());
902        }
903        let mut out = Vec::new();
904        for (id, loc) in &self.index {
905            let doc = self.read_body(*loc)?;
906            if doc
907                .get(field)
908                .is_some_and(|v| total_cmp_value(v, value) == core::cmp::Ordering::Equal)
909            {
910                out.push(DocId(*id));
911            }
912        }
913        Ok(out)
914    }
915
916    /// Returns the ids of all live documents whose `field` falls within `range`.
917    ///
918    /// Bounds are [`Value`]s compared with the same total order the indexes use;
919    /// any [`RangeBounds`] form works (`a..b`, `a..=b`, `..b`, `a..`, `..`).
920    /// If `field` is indexed the matches come back ordered by field value (then
921    /// id); otherwise the store scans every live document. As with
922    /// [`find`](Db::find), the index changes only the speed, not the result.
923    ///
924    /// # Errors
925    ///
926    /// Returns [`Error::Io`] or [`Error::Corrupt`] if a document must be read
927    /// (the unindexed path) and cannot be.
928    ///
929    /// # Examples
930    ///
931    /// ```
932    /// # fn main() -> bison_db::Result<()> {
933    /// # let path = std::env::temp_dir().join("bison_db_range_example.bison");
934    /// # let _ = std::fs::remove_file(&path);
935    /// use bison_db::{Db, Document, Value};
936    /// let mut db = Db::open(&path)?;
937    /// for age in [17_i64, 25, 40, 70] {
938    ///     db.insert({ let mut d = Document::new(); d.set("age", age); d })?;
939    /// }
940    /// db.create_index("age")?;
941    ///
942    /// // Working-age adults: 18..=65.
943    /// let hits = db.range("age", Value::from(18_i64)..=Value::from(65_i64))?;
944    /// assert_eq!(hits.len(), 2); // 25 and 40
945    /// # let _ = std::fs::remove_file(&path);
946    /// # Ok(())
947    /// # }
948    /// ```
949    pub fn range<R: RangeBounds<Value>>(&self, field: &str, range: R) -> Result<Vec<DocId>> {
950        let lo = range.start_bound();
951        let hi = range.end_bound();
952        if let Some(index) = self.indexes.get(field) {
953            return Ok(index.range(lo, hi).into_iter().map(DocId).collect());
954        }
955        let mut out = Vec::new();
956        for (id, loc) in &self.index {
957            let doc = self.read_body(*loc)?;
958            if doc.get(field).is_some_and(|v| in_bounds(v, lo, hi)) {
959                out.push(DocId(*id));
960            }
961        }
962        Ok(out)
963    }
964
965    /// Reads and decodes the document body at `loc`.
966    fn read_body(&self, loc: BodyLoc) -> Result<Document> {
967        let mut buf = vec![0u8; loc.len as usize];
968        read_exact_at(&self.file, &mut buf, loc.offset)?;
969        decode_document(&buf)
970    }
971
972    /// Adds document `id`'s indexed field values to every secondary index.
973    fn index_add(&mut self, id: u64, doc: &Document) {
974        for (field, index) in &mut self.indexes {
975            if let Some(value) = doc.get(field) {
976                index.add(value, id);
977            }
978        }
979    }
980
981    /// Removes document `id`'s indexed field values from every secondary index.
982    fn index_remove(&mut self, id: u64, doc: &Document) {
983        for (field, index) in &mut self.indexes {
984            if let Some(value) = doc.get(field) {
985                index.remove(value, id);
986            }
987        }
988    }
989
990    /// Appends one framed record and updates the index accordingly.
991    ///
992    /// For [`OP_PUT`] the body is encoded and the index repointed at it; for
993    /// [`OP_DELETE`] the index entry is removed. The frame is built in `scratch`
994    /// so the steady-state write path performs no per-record allocation.
995    fn append(&mut self, op: u8, id: u64, doc: Option<&Document>) -> Result<()> {
996        self.scratch.clear();
997        // Reserve the frame header; the length and checksum are backfilled once
998        // the payload is known.
999        self.scratch.extend_from_slice(&[0u8; FRAME_LEN]);
1000        self.scratch.push(op);
1001        self.scratch.extend_from_slice(&id.to_le_bytes());
1002        if let Some(doc) = doc {
1003            encode_document_into(&mut self.scratch, doc)?;
1004        }
1005
1006        let payload_len = self.scratch.len() - FRAME_LEN;
1007        if payload_len > MAX_RECORD_BYTES {
1008            return Err(Error::ValueTooLarge);
1009        }
1010        let crc = crc32c(&self.scratch[FRAME_LEN..]);
1011        self.scratch[0..4].copy_from_slice(&(payload_len as u32).to_le_bytes());
1012        self.scratch[4..8].copy_from_slice(&crc.to_le_bytes());
1013
1014        write_all_at(&self.file, &self.scratch, self.tail)?;
1015
1016        let record_start = self.tail;
1017        self.tail += (FRAME_LEN + payload_len) as u64;
1018
1019        match op {
1020            OP_PUT => {
1021                let offset = record_start + FRAME_LEN as u64 + MIN_PAYLOAD as u64;
1022                let len = (payload_len - MIN_PAYLOAD) as u32;
1023                let _ = self.index.insert(id, BodyLoc { offset, len });
1024            }
1025            OP_DELETE => {
1026                let _ = self.index.remove(&id);
1027            }
1028            _ => {}
1029        }
1030
1031        if self.sync == SyncPolicy::Always {
1032            self.file.sync_all()?;
1033        }
1034        Ok(())
1035    }
1036
1037    /// Writes the 16-byte file header at offset 0 and syncs it, establishing a
1038    /// valid empty store.
1039    fn write_header(&mut self) -> Result<()> {
1040        let mut header = [0u8; HEADER_LEN as usize];
1041        header[0..8].copy_from_slice(&HEADER_MAGIC);
1042        header[8..10].copy_from_slice(&FORMAT_VERSION.to_le_bytes());
1043        write_all_at(&self.file, &header, 0)?;
1044        self.file.sync_all()?;
1045        Ok(())
1046    }
1047
1048    /// Validates the header of an existing file: length, magic, and version.
1049    fn verify_header(&self, file_len: u64) -> Result<()> {
1050        if file_len < HEADER_LEN {
1051            return Err(Error::BadMagic);
1052        }
1053        let mut header = [0u8; HEADER_LEN as usize];
1054        read_exact_at(&self.file, &mut header, 0)?;
1055        if header[0..8] != HEADER_MAGIC {
1056            return Err(Error::BadMagic);
1057        }
1058        let version = u16::from_le_bytes([header[8], header[9]]);
1059        if version > FORMAT_VERSION {
1060            return Err(Error::UnsupportedVersion(version));
1061        }
1062        Ok(())
1063    }
1064
1065    /// Scans every record after the header, rebuilding the index and truncating
1066    /// a torn record at the tail if one is found.
1067    fn replay(&mut self, file_len: u64) -> Result<()> {
1068        let mut offset = HEADER_LEN;
1069        let mut frame = [0u8; FRAME_LEN];
1070
1071        loop {
1072            if offset + FRAME_LEN as u64 > file_len {
1073                break;
1074            }
1075            read_exact_at(&self.file, &mut frame, offset)?;
1076            let payload_len = u32::from_le_bytes([frame[0], frame[1], frame[2], frame[3]]) as usize;
1077            let expected_crc = u32::from_le_bytes([frame[4], frame[5], frame[6], frame[7]]);
1078
1079            if !(MIN_PAYLOAD..=MAX_RECORD_BYTES).contains(&payload_len) {
1080                // A length this size at the tail is an incomplete write; mid-file
1081                // it is corruption. Either way the run of valid records ends here.
1082                break;
1083            }
1084            let record_end = offset + FRAME_LEN as u64 + payload_len as u64;
1085            if record_end > file_len {
1086                break;
1087            }
1088
1089            let mut payload = vec![0u8; payload_len];
1090            read_exact_at(&self.file, &mut payload, offset + FRAME_LEN as u64)?;
1091            if crc32c(&payload) != expected_crc {
1092                if record_end == file_len {
1093                    // Torn final record: drop it and stop.
1094                    break;
1095                }
1096                return Err(Error::Corrupt("crc mismatch"));
1097            }
1098
1099            let op = payload[0];
1100            let id = u64::from_le_bytes([
1101                payload[1], payload[2], payload[3], payload[4], payload[5], payload[6], payload[7],
1102                payload[8],
1103            ]);
1104
1105            match op {
1106                OP_PUT => {
1107                    let offset = offset + FRAME_LEN as u64 + MIN_PAYLOAD as u64;
1108                    let len = (payload_len - MIN_PAYLOAD) as u32;
1109                    let _ = self.index.insert(id, BodyLoc { offset, len });
1110                }
1111                OP_DELETE => {
1112                    let _ = self.index.remove(&id);
1113                }
1114                _ => return Err(Error::Corrupt("unknown record op")),
1115            }
1116            if id >= self.next_id {
1117                self.next_id = id + 1;
1118            }
1119            offset = record_end;
1120        }
1121
1122        if offset < file_len {
1123            // Trailing torn bytes: cut the file back to the last good record.
1124            self.file.set_len(offset)?;
1125        }
1126        self.tail = offset;
1127        Ok(())
1128    }
1129}
1130
1131impl fmt::Debug for Db {
1132    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1133        f.debug_struct("Db")
1134            .field("path", &self.path)
1135            .field("live_documents", &self.index.len())
1136            .field("file_bytes", &self.tail)
1137            .field("sync", &self.sync)
1138            .finish()
1139    }
1140}
1141
1142impl Drop for Db {
1143    /// Makes a best-effort `fsync` on a clean shutdown under
1144    /// [`SyncPolicy::Manual`], so a normal program exit does not lose writes that
1145    /// were never explicitly flushed. Under [`SyncPolicy::Always`] every write is
1146    /// already durable, so nothing is done. Any error here is ignored because a
1147    /// destructor cannot return one; call [`Db::flush`] before dropping when you
1148    /// need to observe a sync failure.
1149    fn drop(&mut self) {
1150        if self.sync == SyncPolicy::Manual {
1151            let _ = self.file.sync_all();
1152        }
1153    }
1154}
1155
1156/// Forces the directory containing `path` to disk, so the file's creation is
1157/// durable. On Unix this is a real `fsync` of the parent directory; on Windows,
1158/// directory handles do not support this and file-level `fsync` already persists
1159/// the entry, so this is a documented no-op.
1160#[cfg(unix)]
1161fn sync_parent_dir(path: &Path) -> Result<()> {
1162    let parent = path.parent().filter(|p| !p.as_os_str().is_empty());
1163    let dir = parent.unwrap_or_else(|| Path::new("."));
1164    let handle = File::open(dir)?;
1165    handle.sync_all()?;
1166    Ok(())
1167}
1168
1169/// Windows counterpart to [`sync_parent_dir`]: a no-op, because the file-level
1170/// `fsync` already makes the directory entry durable on this platform.
1171#[cfg(windows)]
1172fn sync_parent_dir(_path: &Path) -> Result<()> {
1173    Ok(())
1174}
1175
1176#[cfg(test)]
1177#[allow(clippy::unwrap_used, clippy::expect_used)]
1178mod tests {
1179    use super::*;
1180    use crate::value::Value;
1181    use std::sync::atomic::{AtomicU64, Ordering};
1182
1183    /// Returns a unique temp path and removes any stale file at it.
1184    fn temp_path() -> PathBuf {
1185        static COUNTER: AtomicU64 = AtomicU64::new(0);
1186        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
1187        let pid = std::process::id();
1188        let path = std::env::temp_dir().join(format!("bison_db_test_{pid}_{n}.bison"));
1189        let _ = std::fs::remove_file(&path);
1190        path
1191    }
1192
1193    fn doc(pairs: &[(&str, i64)]) -> Document {
1194        let mut d = Document::new();
1195        for (k, v) in pairs {
1196            d.set(*k, *v);
1197        }
1198        d
1199    }
1200
1201    #[test]
1202    fn test_insert_get_roundtrip() {
1203        let path = temp_path();
1204        let mut db = Db::open(&path).unwrap();
1205        let id = db.insert(doc(&[("a", 1), ("b", 2)])).unwrap();
1206        let got = db.get(id).unwrap().unwrap();
1207        assert_eq!(got.get("a").and_then(Value::as_int), Some(1));
1208        let _ = std::fs::remove_file(&path);
1209    }
1210
1211    #[test]
1212    fn test_get_missing_returns_none() {
1213        let path = temp_path();
1214        let db = Db::open(&path).unwrap();
1215        assert!(db.get(DocId::from(1)).unwrap().is_none());
1216        let _ = std::fs::remove_file(&path);
1217    }
1218
1219    #[test]
1220    fn test_delete_removes_document() {
1221        let path = temp_path();
1222        let mut db = Db::open(&path).unwrap();
1223        let id = db.insert(doc(&[("x", 9)])).unwrap();
1224        assert!(db.delete(id).unwrap());
1225        assert!(db.get(id).unwrap().is_none());
1226        assert!(!db.delete(id).unwrap());
1227        let _ = std::fs::remove_file(&path);
1228    }
1229
1230    #[test]
1231    fn test_update_changes_value() {
1232        let path = temp_path();
1233        let mut db = Db::open(&path).unwrap();
1234        let id = db.insert(doc(&[("v", 1)])).unwrap();
1235        assert!(db.update(id, doc(&[("v", 2)])).unwrap());
1236        assert_eq!(
1237            db.get(id)
1238                .unwrap()
1239                .unwrap()
1240                .get("v")
1241                .and_then(Value::as_int),
1242            Some(2)
1243        );
1244        let _ = std::fs::remove_file(&path);
1245    }
1246
1247    #[test]
1248    fn test_update_absent_id_is_false() {
1249        let path = temp_path();
1250        let mut db = Db::open(&path).unwrap();
1251        assert!(!db.update(DocId::from(7), Document::new()).unwrap());
1252        let _ = std::fs::remove_file(&path);
1253    }
1254
1255    #[test]
1256    fn test_reopen_recovers_state() {
1257        let path = temp_path();
1258        let (a, b);
1259        {
1260            let mut db = Db::open(&path).unwrap();
1261            a = db.insert(doc(&[("n", 10)])).unwrap();
1262            b = db.insert(doc(&[("n", 20)])).unwrap();
1263            db.delete(a).unwrap();
1264            db.flush().unwrap();
1265        }
1266        let db = Db::open(&path).unwrap();
1267        assert!(db.get(a).unwrap().is_none());
1268        assert_eq!(
1269            db.get(b).unwrap().unwrap().get("n").and_then(Value::as_int),
1270            Some(20)
1271        );
1272        assert_eq!(db.len(), 1);
1273        let _ = std::fs::remove_file(&path);
1274    }
1275
1276    #[test]
1277    fn test_reopen_continues_id_sequence() {
1278        let path = temp_path();
1279        let first;
1280        {
1281            let mut db = Db::open(&path).unwrap();
1282            first = db.insert(Document::new()).unwrap();
1283        }
1284        let mut db = Db::open(&path).unwrap();
1285        let second = db.insert(Document::new()).unwrap();
1286        assert!(second.get() > first.get());
1287        let _ = std::fs::remove_file(&path);
1288    }
1289
1290    #[test]
1291    fn test_open_rejects_foreign_file() {
1292        let path = temp_path();
1293        std::fs::write(&path, b"this is definitely not a bison-db file at all").unwrap();
1294        assert!(matches!(Db::open(&path), Err(Error::BadMagic)));
1295        let _ = std::fs::remove_file(&path);
1296    }
1297
1298    #[test]
1299    fn test_torn_tail_is_truncated_on_open() {
1300        let path = temp_path();
1301        let keep;
1302        {
1303            let mut db = Db::open(&path).unwrap();
1304            keep = db.insert(doc(&[("ok", 1)])).unwrap();
1305            db.flush().unwrap();
1306        }
1307        // Append a bogus frame claiming a payload longer than what follows.
1308        {
1309            use std::io::Write;
1310            let mut f = OpenOptions::new().append(true).open(&path).unwrap();
1311            let mut frame = Vec::new();
1312            frame.extend_from_slice(&999u32.to_le_bytes());
1313            frame.extend_from_slice(&0u32.to_le_bytes());
1314            frame.extend_from_slice(b"short");
1315            f.write_all(&frame).unwrap();
1316            f.flush().unwrap();
1317        }
1318        let db = Db::open(&path).unwrap();
1319        assert!(db.get(keep).unwrap().is_some());
1320        assert_eq!(db.len(), 1);
1321        let _ = std::fs::remove_file(&path);
1322    }
1323
1324    #[test]
1325    fn test_stats_reflect_live_documents() {
1326        let path = temp_path();
1327        let mut db = Db::open(&path).unwrap();
1328        db.insert(doc(&[("a", 1)])).unwrap();
1329        let id = db.insert(doc(&[("b", 2)])).unwrap();
1330        db.delete(id).unwrap();
1331        let stats = db.stats();
1332        assert_eq!(stats.live_documents, 1);
1333        assert!(stats.file_bytes > HEADER_LEN);
1334        let _ = std::fs::remove_file(&path);
1335    }
1336
1337    fn sorted(mut ids: Vec<DocId>) -> Vec<u64> {
1338        ids.sort();
1339        ids.into_iter().map(DocId::get).collect()
1340    }
1341
1342    #[test]
1343    fn test_create_index_then_find() {
1344        let path = temp_path();
1345        let mut db = Db::open(&path).unwrap();
1346        let a = db.insert(doc(&[("g", 1)])).unwrap();
1347        let b = db.insert(doc(&[("g", 2)])).unwrap();
1348        let c = db.insert(doc(&[("g", 1)])).unwrap();
1349
1350        db.create_index("g").unwrap();
1351        assert_eq!(
1352            sorted(db.find("g", &Value::from(1_i64)).unwrap()),
1353            sorted(vec![a, c])
1354        );
1355        assert_eq!(
1356            sorted(db.find("g", &Value::from(2_i64)).unwrap()),
1357            vec![b.get()]
1358        );
1359        assert!(db.find("g", &Value::from(9_i64)).unwrap().is_empty());
1360        let _ = std::fs::remove_file(&path);
1361    }
1362
1363    #[test]
1364    fn test_find_indexed_matches_scan() {
1365        let path = temp_path();
1366        let mut db = Db::open(&path).unwrap();
1367        for n in [1, 2, 2, 3, 2] {
1368            db.insert(doc(&[("k", n)])).unwrap();
1369        }
1370        let scan = sorted(db.find("k", &Value::from(2_i64)).unwrap()); // no index yet
1371        db.create_index("k").unwrap();
1372        let indexed = sorted(db.find("k", &Value::from(2_i64)).unwrap());
1373        assert_eq!(scan, indexed);
1374        assert_eq!(scan.len(), 3);
1375        let _ = std::fs::remove_file(&path);
1376    }
1377
1378    #[test]
1379    fn test_range_query_inclusive_and_exclusive() {
1380        let path = temp_path();
1381        let mut db = Db::open(&path).unwrap();
1382        for n in [10, 20, 30, 40] {
1383            db.insert(doc(&[("age", n)])).unwrap();
1384        }
1385        db.create_index("age").unwrap();
1386        assert_eq!(
1387            db.range("age", Value::from(20_i64)..=Value::from(30_i64))
1388                .unwrap()
1389                .len(),
1390            2
1391        );
1392        assert_eq!(
1393            db.range("age", Value::from(20_i64)..Value::from(40_i64))
1394                .unwrap()
1395                .len(),
1396            2
1397        );
1398        assert_eq!(db.range("age", Value::from(25_i64)..).unwrap().len(), 2);
1399        let _ = std::fs::remove_file(&path);
1400    }
1401
1402    #[test]
1403    fn test_index_maintained_on_update_and_delete() {
1404        let path = temp_path();
1405        let mut db = Db::open(&path).unwrap();
1406        let id = db.insert(doc(&[("status", 1)])).unwrap();
1407        db.create_index("status").unwrap();
1408        assert_eq!(db.find("status", &Value::from(1_i64)).unwrap(), vec![id]);
1409
1410        db.update(id, doc(&[("status", 2)])).unwrap();
1411        assert!(db.find("status", &Value::from(1_i64)).unwrap().is_empty());
1412        assert_eq!(db.find("status", &Value::from(2_i64)).unwrap(), vec![id]);
1413
1414        db.delete(id).unwrap();
1415        assert!(db.find("status", &Value::from(2_i64)).unwrap().is_empty());
1416        let _ = std::fs::remove_file(&path);
1417    }
1418
1419    #[test]
1420    fn test_indexes_listed_and_dropped() {
1421        let path = temp_path();
1422        let mut db = Db::open(&path).unwrap();
1423        db.create_index("a").unwrap();
1424        db.create_index("b").unwrap();
1425        db.create_index("a").unwrap(); // idempotent
1426        let mut names: Vec<&str> = db.indexes().collect();
1427        names.sort_unstable();
1428        assert_eq!(names, ["a", "b"]);
1429        assert!(db.drop_index("a"));
1430        assert!(!db.drop_index("a"));
1431        assert_eq!(db.indexes().count(), 1);
1432        let _ = std::fs::remove_file(&path);
1433    }
1434
1435    #[test]
1436    fn test_index_not_persisted_but_rebuildable_after_reopen() {
1437        let path = temp_path();
1438        let id;
1439        {
1440            let mut db = Db::open(&path).unwrap();
1441            id = db.insert(doc(&[("city", 7)])).unwrap();
1442            db.create_index("city").unwrap();
1443            db.flush().unwrap();
1444        }
1445        let mut db = Db::open(&path).unwrap();
1446        assert_eq!(db.indexes().count(), 0); // indexes are not on disk
1447        db.create_index("city").unwrap(); // rebuild from the log
1448        assert_eq!(db.find("city", &Value::from(7_i64)).unwrap(), vec![id]);
1449        let _ = std::fs::remove_file(&path);
1450    }
1451
1452    #[test]
1453    fn test_default_sync_policy_is_manual() {
1454        let path = temp_path();
1455        let db = Db::open(&path).unwrap();
1456        assert_eq!(db.sync_policy(), SyncPolicy::Manual);
1457        let _ = std::fs::remove_file(&path);
1458    }
1459
1460    #[test]
1461    fn test_options_set_always_sync_policy() {
1462        let path = temp_path();
1463        let mut db = Db::open_with(&path, DbOptions::new().sync(SyncPolicy::Always)).unwrap();
1464        assert_eq!(db.sync_policy(), SyncPolicy::Always);
1465        // Every write fsyncs; data is present and reopen recovers it.
1466        let id = db.insert(doc(&[("v", 1)])).unwrap();
1467        assert!(db.get(id).unwrap().is_some());
1468        drop(db);
1469        let db = Db::open(&path).unwrap();
1470        assert_eq!(db.len(), 1);
1471        let _ = std::fs::remove_file(&path);
1472    }
1473
1474    #[test]
1475    fn test_always_sync_persists_without_explicit_flush() {
1476        let path = temp_path();
1477        let id;
1478        {
1479            let mut db = Db::open_with(&path, DbOptions::new().sync(SyncPolicy::Always)).unwrap();
1480            id = db.insert(doc(&[("durable", 1)])).unwrap();
1481            // No flush() call: Always already synced each write.
1482        }
1483        let db = Db::open(&path).unwrap();
1484        assert!(db.get(id).unwrap().is_some());
1485        let _ = std::fs::remove_file(&path);
1486    }
1487
1488    #[test]
1489    fn test_dboptions_open_matches_db_open() {
1490        let path = temp_path();
1491        let db = DbOptions::new().open(&path).unwrap();
1492        assert_eq!(db.sync_policy(), SyncPolicy::Manual);
1493        let _ = std::fs::remove_file(&path);
1494    }
1495}