bison_db/store.rs
1//! The single-file document store: [`Db`].
2//!
3//! `bison-db` persists documents to one append-only file. Every write — insert,
4//! overwrite, or delete — appends a self-describing record to the tail; the file
5//! is never edited in place. An in-memory index maps each live document id to
6//! the byte offset of its most recent record, so a read is one hash lookup and
7//! one positional read. This log-structured design makes writes sequential
8//! (the pattern disks and SSDs serve fastest) and keeps a crash from corrupting
9//! data already on disk: a half-written record at the tail is detected by its
10//! length and checksum and dropped on the next open.
11//!
12//! ## Record framing
13//!
14//! The file opens with a fixed header (magic plus a format version), then a run
15//! of records. Each record is an 8-byte frame (`u32` payload length, `u32`
16//! CRC-32C of the payload) followed by the payload itself: a one-byte operation
17//! tag, the 8-byte document id, and — for an insert or overwrite — the encoded
18//! document body. A delete writes a tombstone with no body.
19//!
20//! ## Durability
21//!
22//! A record reaches the OS page cache as soon as it is written, so it is visible
23//! to later reads in the same process immediately. When it becomes durable
24//! against a power loss is governed by the store's [`SyncPolicy`]:
25//!
26//! - [`SyncPolicy::Always`] forces an `fsync` after every write, so each
27//! operation is durable the moment it returns.
28//! - [`SyncPolicy::Manual`] (the default) syncs only on [`Db::flush`] and once,
29//! best-effort, on drop. It is faster, and writes remain crash-*safe* — a torn
30//! write is never misread — but the most recent unsynced writes can be lost on
31//! power loss.
32//!
33//! Either way the on-disk invariant holds: a crash never tears a record that was
34//! already durable. On a newly created file, the parent directory is `fsync`ed
35//! so the file's existence is itself durable.
36
37use std::collections::HashMap;
38use std::fmt;
39use std::fs::{File, OpenOptions};
40use std::ops::RangeBounds;
41use std::path::{Path, PathBuf};
42
43use crate::codec::{crc32c, decode_document, encode_document_into};
44use crate::error::{Error, Result};
45use crate::index::{SecondaryIndex, in_bounds, total_cmp_value};
46use crate::sys::{read_exact_at, write_all_at};
47use crate::value::{Document, Value};
48
49/// The largest record payload the store will write or accept while reading.
50///
51/// A document encodes to at most this many bytes; a larger one is rejected with
52/// [`Error::ValueTooLarge`] on write. On read, any framed length above this cap
53/// is treated as corruption, which bounds the allocation the recovery path can
54/// be asked to make from a damaged file.
55pub const MAX_RECORD_BYTES: usize = 64 * 1024 * 1024;
56
57/// Magic bytes at the start of every store file. The trailing digit tracks the
58/// header layout, distinct from the format version that follows it.
59const HEADER_MAGIC: [u8; 8] = *b"BISONDB1";
60
61/// On-disk format version. Frozen at `1` as of v0.4.0: the layout described in
62/// `docs/FORMAT.md` is stable, and files written by 0.2.0 onward are readable by
63/// every later release. Bumped only on an incompatible record-layout change,
64/// which would be a major-version event.
65const FORMAT_VERSION: u16 = 1;
66
67/// Length of the file header: 8 magic bytes, a `u16` version, 6 reserved bytes.
68const HEADER_LEN: u64 = 16;
69
70/// Size of a record frame: a `u32` length followed by a `u32` checksum.
71const FRAME_LEN: usize = 8;
72
73/// Smallest legal payload: a one-byte op tag plus an 8-byte id, with no body
74/// (the shape of a delete tombstone).
75const MIN_PAYLOAD: usize = 1 + 8;
76
77/// Operation tag for an insert or overwrite: the payload carries a document body.
78const OP_PUT: u8 = 1;
79
80/// Operation tag for a delete: the payload is the op tag and id only.
81const OP_DELETE: u8 = 2;
82
83/// A document's primary key within a [`Db`].
84///
85/// Ids are assigned by [`Db::insert`] as a dense, monotonically increasing
86/// sequence starting at 1; `0` is never assigned and can be used as a sentinel.
87/// The id is stable for the life of the document and survives reopening the
88/// file. Reconstruct one with [`DocId::from`] when you have stored it elsewhere.
89///
90/// # Examples
91///
92/// ```
93/// use bison_db::DocId;
94/// let id = DocId::from(7);
95/// assert_eq!(id.get(), 7);
96/// assert_eq!(id.to_string(), "7");
97/// ```
98#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
99pub struct DocId(u64);
100
101impl DocId {
102 /// Returns the underlying `u64`.
103 ///
104 /// # Examples
105 ///
106 /// ```
107 /// use bison_db::DocId;
108 /// assert_eq!(DocId::from(42).get(), 42);
109 /// ```
110 #[inline]
111 #[must_use]
112 pub const fn get(self) -> u64 {
113 self.0
114 }
115}
116
117impl From<u64> for DocId {
118 #[inline]
119 fn from(raw: u64) -> Self {
120 DocId(raw)
121 }
122}
123
124impl From<DocId> for u64 {
125 #[inline]
126 fn from(id: DocId) -> Self {
127 id.0
128 }
129}
130
131impl fmt::Display for DocId {
132 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
133 write!(f, "{}", self.0)
134 }
135}
136
137/// Where a live document's body sits in the file.
138#[derive(Clone, Copy)]
139struct BodyLoc {
140 /// Byte offset of the encoded document body.
141 offset: u64,
142 /// Length of the encoded document body in bytes.
143 len: u32,
144}
145
146/// A point-in-time summary of a store's size and contents.
147///
148/// Returned by [`Db::stats`]. The gap between `file_bytes` and `live_bytes`
149/// (plus framing) is space held by superseded and deleted records — the slack a
150/// future compaction step will reclaim.
151///
152/// # Examples
153///
154/// ```no_run
155/// # fn main() -> bison_db::Result<()> {
156/// let db = bison_db::Db::open("data.bison")?;
157/// let stats = db.stats();
158/// println!("{} live documents in {} bytes", stats.live_documents, stats.file_bytes);
159/// # Ok(())
160/// # }
161/// ```
162#[derive(Clone, Copy, Debug, PartialEq, Eq)]
163pub struct Stats {
164 /// Number of documents currently readable.
165 pub live_documents: usize,
166 /// Total size of the file on disk, in bytes.
167 pub file_bytes: u64,
168 /// Bytes occupied by the bodies of live documents, excluding framing.
169 pub live_bytes: u64,
170}
171
172/// When a write is made durable on disk.
173///
174/// bison-db never holds writes in a userspace buffer — every write reaches the
175/// operating system immediately and is visible to later reads. This policy
176/// controls only when the store forces those bytes through the OS cache to the
177/// physical device with `fsync`, which is what protects them from a power loss.
178///
179/// # Examples
180///
181/// ```
182/// # fn main() -> bison_db::Result<()> {
183/// use bison_db::{DbOptions, SyncPolicy};
184/// # let path = std::env::temp_dir().join("bison_db_syncpolicy_doc.bison");
185/// # let _ = std::fs::remove_file(&path);
186/// // Durable per write, at the cost of an fsync on every insert/update/delete.
187/// let db = DbOptions::new().sync(SyncPolicy::Always).open(&path)?;
188/// # drop(db);
189/// # let _ = std::fs::remove_file(&path);
190/// # Ok(())
191/// # }
192/// ```
193#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
194pub enum SyncPolicy {
195 /// `fsync` after every write before it returns. Each insert, update, and
196 /// delete is durable the moment the call completes, at the cost of one
197 /// device sync per operation.
198 Always,
199 /// `fsync` only when [`Db::flush`] is called (and once, best-effort, when the
200 /// store is dropped). Writes are still crash-*safe* — a torn write is never
201 /// misread — but the most recent unsynced writes can be lost on power loss.
202 /// This is the default, and the fastest policy.
203 #[default]
204 Manual,
205}
206
207/// Options for opening a [`Db`], built fluently and finished with
208/// [`open`](DbOptions::open).
209///
210/// Use this when the default [`Db::open`] is not enough — currently, to choose a
211/// [`SyncPolicy`]. The set of options is intentionally small and will only grow
212/// additively.
213///
214/// # Examples
215///
216/// ```
217/// # fn main() -> bison_db::Result<()> {
218/// use bison_db::{DbOptions, SyncPolicy};
219/// # let path = std::env::temp_dir().join("bison_db_dboptions_doc.bison");
220/// # let _ = std::fs::remove_file(&path);
221/// let db = DbOptions::new().sync(SyncPolicy::Always).open(&path)?;
222/// assert_eq!(db.sync_policy(), SyncPolicy::Always);
223/// # drop(db);
224/// # let _ = std::fs::remove_file(&path);
225/// # Ok(())
226/// # }
227/// ```
228#[derive(Clone, Copy, Debug, Default)]
229pub struct DbOptions {
230 sync: SyncPolicy,
231}
232
233impl DbOptions {
234 /// Creates options with the defaults ([`SyncPolicy::Manual`]).
235 ///
236 /// # Examples
237 ///
238 /// ```
239 /// use bison_db::{DbOptions, SyncPolicy};
240 /// assert_eq!(DbOptions::new().build_sync_policy(), SyncPolicy::Manual);
241 /// ```
242 #[must_use]
243 pub fn new() -> Self {
244 DbOptions::default()
245 }
246
247 /// Sets the [`SyncPolicy`] for the store.
248 ///
249 /// # Examples
250 ///
251 /// ```
252 /// use bison_db::{DbOptions, SyncPolicy};
253 /// let opts = DbOptions::new().sync(SyncPolicy::Always);
254 /// assert_eq!(opts.build_sync_policy(), SyncPolicy::Always);
255 /// ```
256 #[must_use]
257 pub fn sync(mut self, policy: SyncPolicy) -> Self {
258 self.sync = policy;
259 self
260 }
261
262 /// Returns the [`SyncPolicy`] these options currently carry.
263 ///
264 /// # Examples
265 ///
266 /// ```
267 /// use bison_db::{DbOptions, SyncPolicy};
268 /// assert_eq!(DbOptions::new().build_sync_policy(), SyncPolicy::Manual);
269 /// ```
270 #[must_use]
271 pub fn build_sync_policy(&self) -> SyncPolicy {
272 self.sync
273 }
274
275 /// Opens (or creates) the store at `path` with these options.
276 ///
277 /// Equivalent to [`Db::open`] when the options are the defaults.
278 ///
279 /// # Errors
280 ///
281 /// Same as [`Db::open`].
282 ///
283 /// # Examples
284 ///
285 /// ```
286 /// # fn main() -> bison_db::Result<()> {
287 /// use bison_db::{DbOptions, SyncPolicy};
288 /// # let path = std::env::temp_dir().join("bison_db_dboptions_open_doc.bison");
289 /// # let _ = std::fs::remove_file(&path);
290 /// let db = DbOptions::new().sync(SyncPolicy::Always).open(&path)?;
291 /// # drop(db);
292 /// # let _ = std::fs::remove_file(&path);
293 /// # Ok(())
294 /// # }
295 /// ```
296 pub fn open<P: AsRef<Path>>(self, path: P) -> Result<Db> {
297 Db::open_inner(path.as_ref().to_path_buf(), self.sync)
298 }
299}
300
301/// An embedded document store backed by a single append-only file.
302///
303/// Open one with [`Db::open`], then [`insert`](Db::insert),
304/// [`get`](Db::get), [`update`](Db::update), and [`delete`](Db::delete)
305/// documents by id. Reads take `&self` and writes take `&mut self`, so the
306/// compiler enforces single-writer access; share a `Db` across threads by
307/// placing it behind your own lock. Call [`flush`](Db::flush) to make recent
308/// writes durable.
309///
310/// # Examples
311///
312/// ```
313/// # fn main() -> bison_db::Result<()> {
314/// use bison_db::{Db, Document};
315///
316/// let dir = std::env::temp_dir().join("bison_db_doc_example");
317/// let _ = std::fs::remove_file(&dir);
318/// let mut db = Db::open(&dir)?;
319///
320/// let mut user = Document::new();
321/// user.set("name", "grace").set("born", 1906_i64);
322/// let id = db.insert(user)?;
323///
324/// let fetched = db.get(id)?.expect("just inserted");
325/// assert_eq!(fetched.get("name").and_then(|v| v.as_str()), Some("grace"));
326///
327/// db.flush()?;
328/// # let _ = std::fs::remove_file(&dir);
329/// # Ok(())
330/// # }
331/// ```
332pub struct Db {
333 /// The open store file, used for both positional reads and tail appends.
334 file: File,
335 /// Path the store was opened from, returned by [`Db::path`].
336 path: PathBuf,
337 /// Live document id to the location of its most recent body.
338 index: HashMap<u64, BodyLoc>,
339 /// Offset at which the next record will be appended.
340 tail: u64,
341 /// Id that the next [`Db::insert`] will assign.
342 next_id: u64,
343 /// Reusable buffer for framing a record, so writes do not allocate.
344 scratch: Vec<u8>,
345 /// Secondary indexes by field name, built on demand and maintained on every
346 /// write. Not persisted: rebuilt via [`Db::create_index`] each session.
347 indexes: HashMap<String, SecondaryIndex>,
348 /// When to force writes to disk with `fsync`.
349 sync: SyncPolicy,
350}
351
352impl Db {
353 /// Opens the store at `path`, creating an empty one if the file does not
354 /// exist, and replaying any existing records to rebuild the index.
355 ///
356 /// On open the whole log is scanned: each record's checksum is verified and
357 /// the in-memory index is reconstructed from the surviving inserts and
358 /// deletes. A record left half-written by a crash — detectable because it
359 /// runs past the end of the file or fails its checksum at the tail — is
360 /// truncated away, restoring the file to its last consistent state. A
361 /// checksum failure on a record that is *not* at the tail is reported as
362 /// [`Error::Corrupt`], because that indicates in-place damage rather than a
363 /// torn write.
364 ///
365 /// Uses [`SyncPolicy::Manual`]; for a different policy, open through
366 /// [`DbOptions`].
367 ///
368 /// # Errors
369 ///
370 /// Returns [`Error::Io`] if the file cannot be opened or read,
371 /// [`Error::BadMagic`] if an existing file is not a bison-db store,
372 /// [`Error::UnsupportedVersion`] if it was written by a newer format, and
373 /// [`Error::Corrupt`] if a non-tail record fails verification.
374 ///
375 /// # Examples
376 ///
377 /// ```
378 /// # fn main() -> bison_db::Result<()> {
379 /// let path = std::env::temp_dir().join("bison_db_open_example.bison");
380 /// let _ = std::fs::remove_file(&path);
381 /// let db = bison_db::Db::open(&path)?;
382 /// assert!(db.is_empty());
383 /// # let _ = std::fs::remove_file(&path);
384 /// # Ok(())
385 /// # }
386 /// ```
387 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
388 DbOptions::new().open(path)
389 }
390
391 /// Opens (or creates) the store at `path` with the given [`DbOptions`].
392 ///
393 /// A shorthand for [`DbOptions::open`]; see [`Db::open`] for the open and
394 /// recovery contract.
395 ///
396 /// # Errors
397 ///
398 /// Same as [`Db::open`].
399 ///
400 /// # Examples
401 ///
402 /// ```
403 /// # fn main() -> bison_db::Result<()> {
404 /// use bison_db::{Db, DbOptions, SyncPolicy};
405 /// # let path = std::env::temp_dir().join("bison_db_open_with_example.bison");
406 /// # let _ = std::fs::remove_file(&path);
407 /// let db = Db::open_with(&path, DbOptions::new().sync(SyncPolicy::Always))?;
408 /// assert_eq!(db.sync_policy(), SyncPolicy::Always);
409 /// # drop(db);
410 /// # let _ = std::fs::remove_file(&path);
411 /// # Ok(())
412 /// # }
413 /// ```
414 pub fn open_with<P: AsRef<Path>>(path: P, options: DbOptions) -> Result<Self> {
415 options.open(path)
416 }
417
418 /// The shared open path used by [`Db::open`] and [`DbOptions::open`].
419 fn open_inner(path: PathBuf, sync: SyncPolicy) -> Result<Self> {
420 let file = OpenOptions::new()
421 .read(true)
422 .write(true)
423 .create(true)
424 .truncate(false)
425 .open(&path)?;
426 let file_len = file.metadata()?.len();
427
428 let mut db = Db {
429 file,
430 path,
431 index: HashMap::new(),
432 tail: HEADER_LEN,
433 next_id: 1,
434 scratch: Vec::with_capacity(256),
435 indexes: HashMap::new(),
436 sync,
437 };
438
439 if file_len == 0 {
440 db.write_header()?;
441 // Make the newly created file's directory entry durable, so the file
442 // is guaranteed to exist after a crash that follows creation.
443 sync_parent_dir(&db.path)?;
444 } else {
445 db.verify_header(file_len)?;
446 db.replay(file_len)?;
447 }
448 Ok(db)
449 }
450
451 /// Returns the store's [`SyncPolicy`].
452 ///
453 /// # Examples
454 ///
455 /// ```
456 /// # fn main() -> bison_db::Result<()> {
457 /// use bison_db::{Db, SyncPolicy};
458 /// # let path = std::env::temp_dir().join("bison_db_syncpolicy_getter.bison");
459 /// # let _ = std::fs::remove_file(&path);
460 /// let db = Db::open(&path)?;
461 /// assert_eq!(db.sync_policy(), SyncPolicy::Manual);
462 /// # drop(db);
463 /// # let _ = std::fs::remove_file(&path);
464 /// # Ok(())
465 /// # }
466 /// ```
467 #[must_use]
468 pub fn sync_policy(&self) -> SyncPolicy {
469 self.sync
470 }
471
472 /// Inserts `doc`, assigning and returning a fresh [`DocId`].
473 ///
474 /// The document is appended to the log and indexed; it is readable
475 /// immediately and durable after the next [`flush`](Db::flush).
476 ///
477 /// # Errors
478 ///
479 /// Returns [`Error::ValueTooLarge`] if the encoded document exceeds
480 /// [`MAX_RECORD_BYTES`], or [`Error::Io`] if the append fails.
481 ///
482 /// # Examples
483 ///
484 /// ```
485 /// # fn main() -> bison_db::Result<()> {
486 /// # let path = std::env::temp_dir().join("bison_db_insert_example.bison");
487 /// # let _ = std::fs::remove_file(&path);
488 /// use bison_db::{Db, Document};
489 /// let mut db = Db::open(&path)?;
490 /// let mut doc = Document::new();
491 /// doc.set("k", "v");
492 /// let id = db.insert(doc)?;
493 /// assert!(db.contains(id));
494 /// # let _ = std::fs::remove_file(&path);
495 /// # Ok(())
496 /// # }
497 /// ```
498 pub fn insert(&mut self, doc: Document) -> Result<DocId> {
499 let id = self.next_id;
500 self.append(OP_PUT, id, Some(&doc))?;
501 self.next_id = id + 1;
502 self.index_add(id, &doc);
503 Ok(DocId(id))
504 }
505
506 /// Reads the document stored under `id`, or `None` if no live document has
507 /// that id.
508 ///
509 /// # Errors
510 ///
511 /// Returns [`Error::Io`] if the body cannot be read, or [`Error::Corrupt`]
512 /// if the stored bytes fail to decode (which a passing checksum makes
513 /// unexpected in practice).
514 ///
515 /// # Examples
516 ///
517 /// ```
518 /// # fn main() -> bison_db::Result<()> {
519 /// # let path = std::env::temp_dir().join("bison_db_get_example.bison");
520 /// # let _ = std::fs::remove_file(&path);
521 /// use bison_db::{Db, Document, DocId};
522 /// let mut db = Db::open(&path)?;
523 /// let id = db.insert({ let mut d = Document::new(); d.set("n", 1_i64); d })?;
524 /// assert!(db.get(id)?.is_some());
525 /// assert!(db.get(DocId::from(9999))?.is_none());
526 /// # let _ = std::fs::remove_file(&path);
527 /// # Ok(())
528 /// # }
529 /// ```
530 pub fn get(&self, id: DocId) -> Result<Option<Document>> {
531 match self.index.get(&id.0).copied() {
532 Some(loc) => self.read_body(loc).map(Some),
533 None => Ok(None),
534 }
535 }
536
537 /// Overwrites the document stored under `id` with `doc`, returning `true` if
538 /// a document was present to overwrite and `false` otherwise.
539 ///
540 /// A successful update appends a new record and repoints the index; the
541 /// previous body remains in the file as dead space until compaction.
542 ///
543 /// # Errors
544 ///
545 /// Returns [`Error::ValueTooLarge`] or [`Error::Io`] under the same
546 /// conditions as [`insert`](Db::insert).
547 ///
548 /// # Examples
549 ///
550 /// ```
551 /// # fn main() -> bison_db::Result<()> {
552 /// # let path = std::env::temp_dir().join("bison_db_update_example.bison");
553 /// # let _ = std::fs::remove_file(&path);
554 /// use bison_db::{Db, Document, DocId};
555 /// let mut db = Db::open(&path)?;
556 /// let id = db.insert({ let mut d = Document::new(); d.set("v", 1_i64); d })?;
557 ///
558 /// let mut next = Document::new();
559 /// next.set("v", 2_i64);
560 /// assert!(db.update(id, next)?);
561 /// assert!(!db.update(DocId::from(404), Document::new())?);
562 /// # let _ = std::fs::remove_file(&path);
563 /// # Ok(())
564 /// # }
565 /// ```
566 pub fn update(&mut self, id: DocId, doc: Document) -> Result<bool> {
567 let Some(loc) = self.index.get(&id.0).copied() else {
568 return Ok(false);
569 };
570 if !self.indexes.is_empty() {
571 let old = self.read_body(loc)?;
572 self.index_remove(id.0, &old);
573 }
574 self.append(OP_PUT, id.0, Some(&doc))?;
575 self.index_add(id.0, &doc);
576 Ok(true)
577 }
578
579 /// Deletes the document stored under `id`, returning `true` if one was
580 /// present and `false` otherwise.
581 ///
582 /// A tombstone is appended so the deletion survives reopening; the document
583 /// is unreadable as soon as this returns.
584 ///
585 /// # Errors
586 ///
587 /// Returns [`Error::Io`] if the tombstone cannot be appended.
588 ///
589 /// # Examples
590 ///
591 /// ```
592 /// # fn main() -> bison_db::Result<()> {
593 /// # let path = std::env::temp_dir().join("bison_db_delete_example.bison");
594 /// # let _ = std::fs::remove_file(&path);
595 /// use bison_db::{Db, Document};
596 /// let mut db = Db::open(&path)?;
597 /// let id = db.insert({ let mut d = Document::new(); d.set("x", 1_i64); d })?;
598 /// assert!(db.delete(id)?);
599 /// assert!(db.get(id)?.is_none());
600 /// assert!(!db.delete(id)?);
601 /// # let _ = std::fs::remove_file(&path);
602 /// # Ok(())
603 /// # }
604 /// ```
605 pub fn delete(&mut self, id: DocId) -> Result<bool> {
606 let Some(loc) = self.index.get(&id.0).copied() else {
607 return Ok(false);
608 };
609 if !self.indexes.is_empty() {
610 let old = self.read_body(loc)?;
611 self.index_remove(id.0, &old);
612 }
613 self.append(OP_DELETE, id.0, None)?;
614 Ok(true)
615 }
616
617 /// Returns `true` if a live document has this `id`.
618 ///
619 /// This is an in-memory index lookup with no file access.
620 ///
621 /// # Examples
622 ///
623 /// ```
624 /// # fn main() -> bison_db::Result<()> {
625 /// # let path = std::env::temp_dir().join("bison_db_contains_example.bison");
626 /// # let _ = std::fs::remove_file(&path);
627 /// use bison_db::{Db, Document};
628 /// let mut db = Db::open(&path)?;
629 /// let id = db.insert(Document::new())?;
630 /// assert!(db.contains(id));
631 /// # let _ = std::fs::remove_file(&path);
632 /// # Ok(())
633 /// # }
634 /// ```
635 #[must_use]
636 pub fn contains(&self, id: DocId) -> bool {
637 self.index.contains_key(&id.0)
638 }
639
640 /// Returns the number of live documents.
641 ///
642 /// # Examples
643 ///
644 /// ```
645 /// # fn main() -> bison_db::Result<()> {
646 /// # let path = std::env::temp_dir().join("bison_db_len_example.bison");
647 /// # let _ = std::fs::remove_file(&path);
648 /// use bison_db::{Db, Document};
649 /// let mut db = Db::open(&path)?;
650 /// db.insert(Document::new())?;
651 /// assert_eq!(db.len(), 1);
652 /// # let _ = std::fs::remove_file(&path);
653 /// # Ok(())
654 /// # }
655 /// ```
656 #[must_use]
657 pub fn len(&self) -> usize {
658 self.index.len()
659 }
660
661 /// Returns `true` if the store holds no live documents.
662 ///
663 /// # Examples
664 ///
665 /// ```
666 /// # fn main() -> bison_db::Result<()> {
667 /// # let path = std::env::temp_dir().join("bison_db_isempty_example.bison");
668 /// # let _ = std::fs::remove_file(&path);
669 /// let db = bison_db::Db::open(&path)?;
670 /// assert!(db.is_empty());
671 /// # let _ = std::fs::remove_file(&path);
672 /// # Ok(())
673 /// # }
674 /// ```
675 #[must_use]
676 pub fn is_empty(&self) -> bool {
677 self.index.is_empty()
678 }
679
680 /// Returns an iterator over the ids of all live documents.
681 ///
682 /// The order is unspecified and may change between runs; collect and sort if
683 /// you need a stable order.
684 ///
685 /// # Examples
686 ///
687 /// ```
688 /// # fn main() -> bison_db::Result<()> {
689 /// # let path = std::env::temp_dir().join("bison_db_ids_example.bison");
690 /// # let _ = std::fs::remove_file(&path);
691 /// use bison_db::{Db, Document};
692 /// let mut db = Db::open(&path)?;
693 /// db.insert(Document::new())?;
694 /// db.insert(Document::new())?;
695 /// assert_eq!(db.ids().count(), 2);
696 /// # let _ = std::fs::remove_file(&path);
697 /// # Ok(())
698 /// # }
699 /// ```
700 pub fn ids(&self) -> impl Iterator<Item = DocId> + '_ {
701 self.index.keys().copied().map(DocId)
702 }
703
704 /// Flushes buffered writes and `fsync`s the file, making every preceding
705 /// write durable against power loss.
706 ///
707 /// # Errors
708 ///
709 /// Returns [`Error::Io`] if the sync fails.
710 ///
711 /// # Examples
712 ///
713 /// ```
714 /// # fn main() -> bison_db::Result<()> {
715 /// # let path = std::env::temp_dir().join("bison_db_flush_example.bison");
716 /// # let _ = std::fs::remove_file(&path);
717 /// use bison_db::{Db, Document};
718 /// let mut db = Db::open(&path)?;
719 /// db.insert(Document::new())?;
720 /// db.flush()?;
721 /// # let _ = std::fs::remove_file(&path);
722 /// # Ok(())
723 /// # }
724 /// ```
725 pub fn flush(&mut self) -> Result<()> {
726 self.file.sync_all()?;
727 Ok(())
728 }
729
730 /// Returns the path the store was opened from.
731 ///
732 /// # Examples
733 ///
734 /// ```
735 /// # fn main() -> bison_db::Result<()> {
736 /// # let path = std::env::temp_dir().join("bison_db_path_example.bison");
737 /// # let _ = std::fs::remove_file(&path);
738 /// let db = bison_db::Db::open(&path)?;
739 /// assert_eq!(db.path(), path.as_path());
740 /// # let _ = std::fs::remove_file(&path);
741 /// # Ok(())
742 /// # }
743 /// ```
744 #[must_use]
745 pub fn path(&self) -> &Path {
746 &self.path
747 }
748
749 /// Returns a [`Stats`] snapshot of the store's size and live contents.
750 ///
751 /// # Examples
752 ///
753 /// ```
754 /// # fn main() -> bison_db::Result<()> {
755 /// # let path = std::env::temp_dir().join("bison_db_stats_example.bison");
756 /// # let _ = std::fs::remove_file(&path);
757 /// use bison_db::{Db, Document};
758 /// let mut db = Db::open(&path)?;
759 /// db.insert(Document::new())?;
760 /// assert_eq!(db.stats().live_documents, 1);
761 /// # let _ = std::fs::remove_file(&path);
762 /// # Ok(())
763 /// # }
764 /// ```
765 #[must_use]
766 pub fn stats(&self) -> Stats {
767 let live_bytes = self.index.values().map(|loc| u64::from(loc.len)).sum();
768 Stats {
769 live_documents: self.index.len(),
770 file_bytes: self.tail,
771 live_bytes,
772 }
773 }
774
775 /// Builds a secondary index over `field`, making [`find`](Db::find) and
776 /// [`range`](Db::range) on that field fast point and range lookups instead of
777 /// full scans.
778 ///
779 /// The index is built by reading every live document once and recording its
780 /// value for `field`; documents without the field are skipped. From then on,
781 /// it is maintained automatically on every insert, update, and delete. Any
782 /// number of fields may be indexed — call this once per field.
783 ///
784 /// Indexes live in memory only and are **not** persisted: after reopening a
785 /// store, call this again for each field you want indexed. Calling it for a
786 /// field that is already indexed is a no-op.
787 ///
788 /// # Errors
789 ///
790 /// Returns [`Error::Io`] or [`Error::Corrupt`] if a document cannot be read
791 /// while building the index.
792 ///
793 /// # Examples
794 ///
795 /// ```
796 /// # fn main() -> bison_db::Result<()> {
797 /// # let path = std::env::temp_dir().join("bison_db_createindex_example.bison");
798 /// # let _ = std::fs::remove_file(&path);
799 /// use bison_db::{Db, Document, Value};
800 /// let mut db = Db::open(&path)?;
801 /// db.insert({ let mut d = Document::new(); d.set("city", "Oslo"); d })?;
802 ///
803 /// db.create_index("city")?;
804 /// let hits = db.find("city", &Value::from("Oslo"))?;
805 /// assert_eq!(hits.len(), 1);
806 /// # let _ = std::fs::remove_file(&path);
807 /// # Ok(())
808 /// # }
809 /// ```
810 pub fn create_index(&mut self, field: &str) -> Result<()> {
811 if self.indexes.contains_key(field) {
812 return Ok(());
813 }
814 let mut index = SecondaryIndex::new();
815 let entries: Vec<(u64, BodyLoc)> = self.index.iter().map(|(id, loc)| (*id, *loc)).collect();
816 for (id, loc) in entries {
817 let doc = self.read_body(loc)?;
818 if let Some(value) = doc.get(field) {
819 index.add(value, id);
820 }
821 }
822 let _ = self.indexes.insert(field.to_string(), index);
823 Ok(())
824 }
825
826 /// Drops the secondary index over `field`, returning `true` if one existed.
827 ///
828 /// # Examples
829 ///
830 /// ```
831 /// # fn main() -> bison_db::Result<()> {
832 /// # let path = std::env::temp_dir().join("bison_db_dropindex_example.bison");
833 /// # let _ = std::fs::remove_file(&path);
834 /// let mut db = bison_db::Db::open(&path)?;
835 /// db.create_index("name")?;
836 /// assert!(db.drop_index("name"));
837 /// assert!(!db.drop_index("name"));
838 /// # let _ = std::fs::remove_file(&path);
839 /// # Ok(())
840 /// # }
841 /// ```
842 pub fn drop_index(&mut self, field: &str) -> bool {
843 self.indexes.remove(field).is_some()
844 }
845
846 /// Returns an iterator over the names of the currently indexed fields.
847 ///
848 /// The order is unspecified.
849 ///
850 /// # Examples
851 ///
852 /// ```
853 /// # fn main() -> bison_db::Result<()> {
854 /// # let path = std::env::temp_dir().join("bison_db_indexes_example.bison");
855 /// # let _ = std::fs::remove_file(&path);
856 /// let mut db = bison_db::Db::open(&path)?;
857 /// db.create_index("a")?;
858 /// db.create_index("b")?;
859 /// assert_eq!(db.indexes().count(), 2);
860 /// # let _ = std::fs::remove_file(&path);
861 /// # Ok(())
862 /// # }
863 /// ```
864 pub fn indexes(&self) -> impl Iterator<Item = &str> {
865 self.indexes.keys().map(String::as_str)
866 }
867
868 /// Returns the ids of all live documents whose `field` equals `value`.
869 ///
870 /// If `field` is indexed (see [`create_index`](Db::create_index)) this is a
871 /// point lookup; otherwise it falls back to scanning every live document, so
872 /// the result is correct either way — the index only changes the speed.
873 /// Equality follows the same total order the indexes use, so a `Float` field
874 /// distinguishes `0.0` from `-0.0`.
875 ///
876 /// # Errors
877 ///
878 /// Returns [`Error::Io`] or [`Error::Corrupt`] if a document must be read
879 /// (the unindexed path) and cannot be.
880 ///
881 /// # Examples
882 ///
883 /// ```
884 /// # fn main() -> bison_db::Result<()> {
885 /// # let path = std::env::temp_dir().join("bison_db_find_example.bison");
886 /// # let _ = std::fs::remove_file(&path);
887 /// use bison_db::{Db, Document, Value};
888 /// let mut db = Db::open(&path)?;
889 /// db.insert({ let mut d = Document::new(); d.set("role", "admin"); d })?;
890 /// db.insert({ let mut d = Document::new(); d.set("role", "user"); d })?;
891 /// db.create_index("role")?;
892 ///
893 /// assert_eq!(db.find("role", &Value::from("admin"))?.len(), 1);
894 /// assert!(db.find("role", &Value::from("ghost"))?.is_empty());
895 /// # let _ = std::fs::remove_file(&path);
896 /// # Ok(())
897 /// # }
898 /// ```
899 pub fn find(&self, field: &str, value: &Value) -> Result<Vec<DocId>> {
900 if let Some(index) = self.indexes.get(field) {
901 return Ok(index.equal(value).into_iter().map(DocId).collect());
902 }
903 let mut out = Vec::new();
904 for (id, loc) in &self.index {
905 let doc = self.read_body(*loc)?;
906 if doc
907 .get(field)
908 .is_some_and(|v| total_cmp_value(v, value) == core::cmp::Ordering::Equal)
909 {
910 out.push(DocId(*id));
911 }
912 }
913 Ok(out)
914 }
915
916 /// Returns the ids of all live documents whose `field` falls within `range`.
917 ///
918 /// Bounds are [`Value`]s compared with the same total order the indexes use;
919 /// any [`RangeBounds`] form works (`a..b`, `a..=b`, `..b`, `a..`, `..`).
920 /// If `field` is indexed the matches come back ordered by field value (then
921 /// id); otherwise the store scans every live document. As with
922 /// [`find`](Db::find), the index changes only the speed, not the result.
923 ///
924 /// # Errors
925 ///
926 /// Returns [`Error::Io`] or [`Error::Corrupt`] if a document must be read
927 /// (the unindexed path) and cannot be.
928 ///
929 /// # Examples
930 ///
931 /// ```
932 /// # fn main() -> bison_db::Result<()> {
933 /// # let path = std::env::temp_dir().join("bison_db_range_example.bison");
934 /// # let _ = std::fs::remove_file(&path);
935 /// use bison_db::{Db, Document, Value};
936 /// let mut db = Db::open(&path)?;
937 /// for age in [17_i64, 25, 40, 70] {
938 /// db.insert({ let mut d = Document::new(); d.set("age", age); d })?;
939 /// }
940 /// db.create_index("age")?;
941 ///
942 /// // Working-age adults: 18..=65.
943 /// let hits = db.range("age", Value::from(18_i64)..=Value::from(65_i64))?;
944 /// assert_eq!(hits.len(), 2); // 25 and 40
945 /// # let _ = std::fs::remove_file(&path);
946 /// # Ok(())
947 /// # }
948 /// ```
949 pub fn range<R: RangeBounds<Value>>(&self, field: &str, range: R) -> Result<Vec<DocId>> {
950 let lo = range.start_bound();
951 let hi = range.end_bound();
952 if let Some(index) = self.indexes.get(field) {
953 return Ok(index.range(lo, hi).into_iter().map(DocId).collect());
954 }
955 let mut out = Vec::new();
956 for (id, loc) in &self.index {
957 let doc = self.read_body(*loc)?;
958 if doc.get(field).is_some_and(|v| in_bounds(v, lo, hi)) {
959 out.push(DocId(*id));
960 }
961 }
962 Ok(out)
963 }
964
965 /// Reads and decodes the document body at `loc`.
966 fn read_body(&self, loc: BodyLoc) -> Result<Document> {
967 let mut buf = vec![0u8; loc.len as usize];
968 read_exact_at(&self.file, &mut buf, loc.offset)?;
969 decode_document(&buf)
970 }
971
972 /// Adds document `id`'s indexed field values to every secondary index.
973 fn index_add(&mut self, id: u64, doc: &Document) {
974 for (field, index) in &mut self.indexes {
975 if let Some(value) = doc.get(field) {
976 index.add(value, id);
977 }
978 }
979 }
980
981 /// Removes document `id`'s indexed field values from every secondary index.
982 fn index_remove(&mut self, id: u64, doc: &Document) {
983 for (field, index) in &mut self.indexes {
984 if let Some(value) = doc.get(field) {
985 index.remove(value, id);
986 }
987 }
988 }
989
990 /// Appends one framed record and updates the index accordingly.
991 ///
992 /// For [`OP_PUT`] the body is encoded and the index repointed at it; for
993 /// [`OP_DELETE`] the index entry is removed. The frame is built in `scratch`
994 /// so the steady-state write path performs no per-record allocation.
995 fn append(&mut self, op: u8, id: u64, doc: Option<&Document>) -> Result<()> {
996 self.scratch.clear();
997 // Reserve the frame header; the length and checksum are backfilled once
998 // the payload is known.
999 self.scratch.extend_from_slice(&[0u8; FRAME_LEN]);
1000 self.scratch.push(op);
1001 self.scratch.extend_from_slice(&id.to_le_bytes());
1002 if let Some(doc) = doc {
1003 encode_document_into(&mut self.scratch, doc)?;
1004 }
1005
1006 let payload_len = self.scratch.len() - FRAME_LEN;
1007 if payload_len > MAX_RECORD_BYTES {
1008 return Err(Error::ValueTooLarge);
1009 }
1010 let crc = crc32c(&self.scratch[FRAME_LEN..]);
1011 self.scratch[0..4].copy_from_slice(&(payload_len as u32).to_le_bytes());
1012 self.scratch[4..8].copy_from_slice(&crc.to_le_bytes());
1013
1014 write_all_at(&self.file, &self.scratch, self.tail)?;
1015
1016 let record_start = self.tail;
1017 self.tail += (FRAME_LEN + payload_len) as u64;
1018
1019 match op {
1020 OP_PUT => {
1021 let offset = record_start + FRAME_LEN as u64 + MIN_PAYLOAD as u64;
1022 let len = (payload_len - MIN_PAYLOAD) as u32;
1023 let _ = self.index.insert(id, BodyLoc { offset, len });
1024 }
1025 OP_DELETE => {
1026 let _ = self.index.remove(&id);
1027 }
1028 _ => {}
1029 }
1030
1031 if self.sync == SyncPolicy::Always {
1032 self.file.sync_all()?;
1033 }
1034 Ok(())
1035 }
1036
1037 /// Writes the 16-byte file header at offset 0 and syncs it, establishing a
1038 /// valid empty store.
1039 fn write_header(&mut self) -> Result<()> {
1040 let mut header = [0u8; HEADER_LEN as usize];
1041 header[0..8].copy_from_slice(&HEADER_MAGIC);
1042 header[8..10].copy_from_slice(&FORMAT_VERSION.to_le_bytes());
1043 write_all_at(&self.file, &header, 0)?;
1044 self.file.sync_all()?;
1045 Ok(())
1046 }
1047
1048 /// Validates the header of an existing file: length, magic, and version.
1049 fn verify_header(&self, file_len: u64) -> Result<()> {
1050 if file_len < HEADER_LEN {
1051 return Err(Error::BadMagic);
1052 }
1053 let mut header = [0u8; HEADER_LEN as usize];
1054 read_exact_at(&self.file, &mut header, 0)?;
1055 if header[0..8] != HEADER_MAGIC {
1056 return Err(Error::BadMagic);
1057 }
1058 let version = u16::from_le_bytes([header[8], header[9]]);
1059 if version > FORMAT_VERSION {
1060 return Err(Error::UnsupportedVersion(version));
1061 }
1062 Ok(())
1063 }
1064
1065 /// Scans every record after the header, rebuilding the index and truncating
1066 /// a torn record at the tail if one is found.
1067 fn replay(&mut self, file_len: u64) -> Result<()> {
1068 let mut offset = HEADER_LEN;
1069 let mut frame = [0u8; FRAME_LEN];
1070
1071 loop {
1072 if offset + FRAME_LEN as u64 > file_len {
1073 break;
1074 }
1075 read_exact_at(&self.file, &mut frame, offset)?;
1076 let payload_len = u32::from_le_bytes([frame[0], frame[1], frame[2], frame[3]]) as usize;
1077 let expected_crc = u32::from_le_bytes([frame[4], frame[5], frame[6], frame[7]]);
1078
1079 if !(MIN_PAYLOAD..=MAX_RECORD_BYTES).contains(&payload_len) {
1080 // A length this size at the tail is an incomplete write; mid-file
1081 // it is corruption. Either way the run of valid records ends here.
1082 break;
1083 }
1084 let record_end = offset + FRAME_LEN as u64 + payload_len as u64;
1085 if record_end > file_len {
1086 break;
1087 }
1088
1089 let mut payload = vec![0u8; payload_len];
1090 read_exact_at(&self.file, &mut payload, offset + FRAME_LEN as u64)?;
1091 if crc32c(&payload) != expected_crc {
1092 if record_end == file_len {
1093 // Torn final record: drop it and stop.
1094 break;
1095 }
1096 return Err(Error::Corrupt("crc mismatch"));
1097 }
1098
1099 let op = payload[0];
1100 let id = u64::from_le_bytes([
1101 payload[1], payload[2], payload[3], payload[4], payload[5], payload[6], payload[7],
1102 payload[8],
1103 ]);
1104
1105 match op {
1106 OP_PUT => {
1107 let offset = offset + FRAME_LEN as u64 + MIN_PAYLOAD as u64;
1108 let len = (payload_len - MIN_PAYLOAD) as u32;
1109 let _ = self.index.insert(id, BodyLoc { offset, len });
1110 }
1111 OP_DELETE => {
1112 let _ = self.index.remove(&id);
1113 }
1114 _ => return Err(Error::Corrupt("unknown record op")),
1115 }
1116 if id >= self.next_id {
1117 self.next_id = id + 1;
1118 }
1119 offset = record_end;
1120 }
1121
1122 if offset < file_len {
1123 // Trailing torn bytes: cut the file back to the last good record.
1124 self.file.set_len(offset)?;
1125 }
1126 self.tail = offset;
1127 Ok(())
1128 }
1129}
1130
1131impl fmt::Debug for Db {
1132 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1133 f.debug_struct("Db")
1134 .field("path", &self.path)
1135 .field("live_documents", &self.index.len())
1136 .field("file_bytes", &self.tail)
1137 .field("sync", &self.sync)
1138 .finish()
1139 }
1140}
1141
1142impl Drop for Db {
1143 /// Makes a best-effort `fsync` on a clean shutdown under
1144 /// [`SyncPolicy::Manual`], so a normal program exit does not lose writes that
1145 /// were never explicitly flushed. Under [`SyncPolicy::Always`] every write is
1146 /// already durable, so nothing is done. Any error here is ignored because a
1147 /// destructor cannot return one; call [`Db::flush`] before dropping when you
1148 /// need to observe a sync failure.
1149 fn drop(&mut self) {
1150 if self.sync == SyncPolicy::Manual {
1151 let _ = self.file.sync_all();
1152 }
1153 }
1154}
1155
1156/// Forces the directory containing `path` to disk, so the file's creation is
1157/// durable. On Unix this is a real `fsync` of the parent directory; on Windows,
1158/// directory handles do not support this and file-level `fsync` already persists
1159/// the entry, so this is a documented no-op.
1160#[cfg(unix)]
1161fn sync_parent_dir(path: &Path) -> Result<()> {
1162 let parent = path.parent().filter(|p| !p.as_os_str().is_empty());
1163 let dir = parent.unwrap_or_else(|| Path::new("."));
1164 let handle = File::open(dir)?;
1165 handle.sync_all()?;
1166 Ok(())
1167}
1168
1169/// Windows counterpart to [`sync_parent_dir`]: a no-op, because the file-level
1170/// `fsync` already makes the directory entry durable on this platform.
1171#[cfg(windows)]
1172fn sync_parent_dir(_path: &Path) -> Result<()> {
1173 Ok(())
1174}
1175
1176#[cfg(test)]
1177#[allow(clippy::unwrap_used, clippy::expect_used)]
1178mod tests {
1179 use super::*;
1180 use crate::value::Value;
1181 use std::sync::atomic::{AtomicU64, Ordering};
1182
1183 /// Returns a unique temp path and removes any stale file at it.
1184 fn temp_path() -> PathBuf {
1185 static COUNTER: AtomicU64 = AtomicU64::new(0);
1186 let n = COUNTER.fetch_add(1, Ordering::Relaxed);
1187 let pid = std::process::id();
1188 let path = std::env::temp_dir().join(format!("bison_db_test_{pid}_{n}.bison"));
1189 let _ = std::fs::remove_file(&path);
1190 path
1191 }
1192
1193 fn doc(pairs: &[(&str, i64)]) -> Document {
1194 let mut d = Document::new();
1195 for (k, v) in pairs {
1196 d.set(*k, *v);
1197 }
1198 d
1199 }
1200
1201 #[test]
1202 fn test_insert_get_roundtrip() {
1203 let path = temp_path();
1204 let mut db = Db::open(&path).unwrap();
1205 let id = db.insert(doc(&[("a", 1), ("b", 2)])).unwrap();
1206 let got = db.get(id).unwrap().unwrap();
1207 assert_eq!(got.get("a").and_then(Value::as_int), Some(1));
1208 let _ = std::fs::remove_file(&path);
1209 }
1210
1211 #[test]
1212 fn test_get_missing_returns_none() {
1213 let path = temp_path();
1214 let db = Db::open(&path).unwrap();
1215 assert!(db.get(DocId::from(1)).unwrap().is_none());
1216 let _ = std::fs::remove_file(&path);
1217 }
1218
1219 #[test]
1220 fn test_delete_removes_document() {
1221 let path = temp_path();
1222 let mut db = Db::open(&path).unwrap();
1223 let id = db.insert(doc(&[("x", 9)])).unwrap();
1224 assert!(db.delete(id).unwrap());
1225 assert!(db.get(id).unwrap().is_none());
1226 assert!(!db.delete(id).unwrap());
1227 let _ = std::fs::remove_file(&path);
1228 }
1229
1230 #[test]
1231 fn test_update_changes_value() {
1232 let path = temp_path();
1233 let mut db = Db::open(&path).unwrap();
1234 let id = db.insert(doc(&[("v", 1)])).unwrap();
1235 assert!(db.update(id, doc(&[("v", 2)])).unwrap());
1236 assert_eq!(
1237 db.get(id)
1238 .unwrap()
1239 .unwrap()
1240 .get("v")
1241 .and_then(Value::as_int),
1242 Some(2)
1243 );
1244 let _ = std::fs::remove_file(&path);
1245 }
1246
1247 #[test]
1248 fn test_update_absent_id_is_false() {
1249 let path = temp_path();
1250 let mut db = Db::open(&path).unwrap();
1251 assert!(!db.update(DocId::from(7), Document::new()).unwrap());
1252 let _ = std::fs::remove_file(&path);
1253 }
1254
1255 #[test]
1256 fn test_reopen_recovers_state() {
1257 let path = temp_path();
1258 let (a, b);
1259 {
1260 let mut db = Db::open(&path).unwrap();
1261 a = db.insert(doc(&[("n", 10)])).unwrap();
1262 b = db.insert(doc(&[("n", 20)])).unwrap();
1263 db.delete(a).unwrap();
1264 db.flush().unwrap();
1265 }
1266 let db = Db::open(&path).unwrap();
1267 assert!(db.get(a).unwrap().is_none());
1268 assert_eq!(
1269 db.get(b).unwrap().unwrap().get("n").and_then(Value::as_int),
1270 Some(20)
1271 );
1272 assert_eq!(db.len(), 1);
1273 let _ = std::fs::remove_file(&path);
1274 }
1275
1276 #[test]
1277 fn test_reopen_continues_id_sequence() {
1278 let path = temp_path();
1279 let first;
1280 {
1281 let mut db = Db::open(&path).unwrap();
1282 first = db.insert(Document::new()).unwrap();
1283 }
1284 let mut db = Db::open(&path).unwrap();
1285 let second = db.insert(Document::new()).unwrap();
1286 assert!(second.get() > first.get());
1287 let _ = std::fs::remove_file(&path);
1288 }
1289
1290 #[test]
1291 fn test_open_rejects_foreign_file() {
1292 let path = temp_path();
1293 std::fs::write(&path, b"this is definitely not a bison-db file at all").unwrap();
1294 assert!(matches!(Db::open(&path), Err(Error::BadMagic)));
1295 let _ = std::fs::remove_file(&path);
1296 }
1297
1298 #[test]
1299 fn test_torn_tail_is_truncated_on_open() {
1300 let path = temp_path();
1301 let keep;
1302 {
1303 let mut db = Db::open(&path).unwrap();
1304 keep = db.insert(doc(&[("ok", 1)])).unwrap();
1305 db.flush().unwrap();
1306 }
1307 // Append a bogus frame claiming a payload longer than what follows.
1308 {
1309 use std::io::Write;
1310 let mut f = OpenOptions::new().append(true).open(&path).unwrap();
1311 let mut frame = Vec::new();
1312 frame.extend_from_slice(&999u32.to_le_bytes());
1313 frame.extend_from_slice(&0u32.to_le_bytes());
1314 frame.extend_from_slice(b"short");
1315 f.write_all(&frame).unwrap();
1316 f.flush().unwrap();
1317 }
1318 let db = Db::open(&path).unwrap();
1319 assert!(db.get(keep).unwrap().is_some());
1320 assert_eq!(db.len(), 1);
1321 let _ = std::fs::remove_file(&path);
1322 }
1323
1324 #[test]
1325 fn test_stats_reflect_live_documents() {
1326 let path = temp_path();
1327 let mut db = Db::open(&path).unwrap();
1328 db.insert(doc(&[("a", 1)])).unwrap();
1329 let id = db.insert(doc(&[("b", 2)])).unwrap();
1330 db.delete(id).unwrap();
1331 let stats = db.stats();
1332 assert_eq!(stats.live_documents, 1);
1333 assert!(stats.file_bytes > HEADER_LEN);
1334 let _ = std::fs::remove_file(&path);
1335 }
1336
1337 fn sorted(mut ids: Vec<DocId>) -> Vec<u64> {
1338 ids.sort();
1339 ids.into_iter().map(DocId::get).collect()
1340 }
1341
1342 #[test]
1343 fn test_create_index_then_find() {
1344 let path = temp_path();
1345 let mut db = Db::open(&path).unwrap();
1346 let a = db.insert(doc(&[("g", 1)])).unwrap();
1347 let b = db.insert(doc(&[("g", 2)])).unwrap();
1348 let c = db.insert(doc(&[("g", 1)])).unwrap();
1349
1350 db.create_index("g").unwrap();
1351 assert_eq!(
1352 sorted(db.find("g", &Value::from(1_i64)).unwrap()),
1353 sorted(vec![a, c])
1354 );
1355 assert_eq!(
1356 sorted(db.find("g", &Value::from(2_i64)).unwrap()),
1357 vec![b.get()]
1358 );
1359 assert!(db.find("g", &Value::from(9_i64)).unwrap().is_empty());
1360 let _ = std::fs::remove_file(&path);
1361 }
1362
1363 #[test]
1364 fn test_find_indexed_matches_scan() {
1365 let path = temp_path();
1366 let mut db = Db::open(&path).unwrap();
1367 for n in [1, 2, 2, 3, 2] {
1368 db.insert(doc(&[("k", n)])).unwrap();
1369 }
1370 let scan = sorted(db.find("k", &Value::from(2_i64)).unwrap()); // no index yet
1371 db.create_index("k").unwrap();
1372 let indexed = sorted(db.find("k", &Value::from(2_i64)).unwrap());
1373 assert_eq!(scan, indexed);
1374 assert_eq!(scan.len(), 3);
1375 let _ = std::fs::remove_file(&path);
1376 }
1377
1378 #[test]
1379 fn test_range_query_inclusive_and_exclusive() {
1380 let path = temp_path();
1381 let mut db = Db::open(&path).unwrap();
1382 for n in [10, 20, 30, 40] {
1383 db.insert(doc(&[("age", n)])).unwrap();
1384 }
1385 db.create_index("age").unwrap();
1386 assert_eq!(
1387 db.range("age", Value::from(20_i64)..=Value::from(30_i64))
1388 .unwrap()
1389 .len(),
1390 2
1391 );
1392 assert_eq!(
1393 db.range("age", Value::from(20_i64)..Value::from(40_i64))
1394 .unwrap()
1395 .len(),
1396 2
1397 );
1398 assert_eq!(db.range("age", Value::from(25_i64)..).unwrap().len(), 2);
1399 let _ = std::fs::remove_file(&path);
1400 }
1401
1402 #[test]
1403 fn test_index_maintained_on_update_and_delete() {
1404 let path = temp_path();
1405 let mut db = Db::open(&path).unwrap();
1406 let id = db.insert(doc(&[("status", 1)])).unwrap();
1407 db.create_index("status").unwrap();
1408 assert_eq!(db.find("status", &Value::from(1_i64)).unwrap(), vec![id]);
1409
1410 db.update(id, doc(&[("status", 2)])).unwrap();
1411 assert!(db.find("status", &Value::from(1_i64)).unwrap().is_empty());
1412 assert_eq!(db.find("status", &Value::from(2_i64)).unwrap(), vec![id]);
1413
1414 db.delete(id).unwrap();
1415 assert!(db.find("status", &Value::from(2_i64)).unwrap().is_empty());
1416 let _ = std::fs::remove_file(&path);
1417 }
1418
1419 #[test]
1420 fn test_indexes_listed_and_dropped() {
1421 let path = temp_path();
1422 let mut db = Db::open(&path).unwrap();
1423 db.create_index("a").unwrap();
1424 db.create_index("b").unwrap();
1425 db.create_index("a").unwrap(); // idempotent
1426 let mut names: Vec<&str> = db.indexes().collect();
1427 names.sort_unstable();
1428 assert_eq!(names, ["a", "b"]);
1429 assert!(db.drop_index("a"));
1430 assert!(!db.drop_index("a"));
1431 assert_eq!(db.indexes().count(), 1);
1432 let _ = std::fs::remove_file(&path);
1433 }
1434
1435 #[test]
1436 fn test_index_not_persisted_but_rebuildable_after_reopen() {
1437 let path = temp_path();
1438 let id;
1439 {
1440 let mut db = Db::open(&path).unwrap();
1441 id = db.insert(doc(&[("city", 7)])).unwrap();
1442 db.create_index("city").unwrap();
1443 db.flush().unwrap();
1444 }
1445 let mut db = Db::open(&path).unwrap();
1446 assert_eq!(db.indexes().count(), 0); // indexes are not on disk
1447 db.create_index("city").unwrap(); // rebuild from the log
1448 assert_eq!(db.find("city", &Value::from(7_i64)).unwrap(), vec![id]);
1449 let _ = std::fs::remove_file(&path);
1450 }
1451
1452 #[test]
1453 fn test_default_sync_policy_is_manual() {
1454 let path = temp_path();
1455 let db = Db::open(&path).unwrap();
1456 assert_eq!(db.sync_policy(), SyncPolicy::Manual);
1457 let _ = std::fs::remove_file(&path);
1458 }
1459
1460 #[test]
1461 fn test_options_set_always_sync_policy() {
1462 let path = temp_path();
1463 let mut db = Db::open_with(&path, DbOptions::new().sync(SyncPolicy::Always)).unwrap();
1464 assert_eq!(db.sync_policy(), SyncPolicy::Always);
1465 // Every write fsyncs; data is present and reopen recovers it.
1466 let id = db.insert(doc(&[("v", 1)])).unwrap();
1467 assert!(db.get(id).unwrap().is_some());
1468 drop(db);
1469 let db = Db::open(&path).unwrap();
1470 assert_eq!(db.len(), 1);
1471 let _ = std::fs::remove_file(&path);
1472 }
1473
1474 #[test]
1475 fn test_always_sync_persists_without_explicit_flush() {
1476 let path = temp_path();
1477 let id;
1478 {
1479 let mut db = Db::open_with(&path, DbOptions::new().sync(SyncPolicy::Always)).unwrap();
1480 id = db.insert(doc(&[("durable", 1)])).unwrap();
1481 // No flush() call: Always already synced each write.
1482 }
1483 let db = Db::open(&path).unwrap();
1484 assert!(db.get(id).unwrap().is_some());
1485 let _ = std::fs::remove_file(&path);
1486 }
1487
1488 #[test]
1489 fn test_dboptions_open_matches_db_open() {
1490 let path = temp_path();
1491 let db = DbOptions::new().open(&path).unwrap();
1492 assert_eq!(db.sync_policy(), SyncPolicy::Manual);
1493 let _ = std::fs::remove_file(&path);
1494 }
1495}