grebedb/
lib.rs

1//! Lightweight embedded key-value store/database backed by files
2//! in a virtual file system interface.
3//!
4//! To open a database, use [`Database`]:
5//!
6//! ```
7//! use grebedb::{Database, Options};
8//!
9//! # fn main() -> Result<(), grebedb::Error> {
10//! let options = Options::default();
11//! // let mut db = Database::open_memory("path/to/empty/directory/", options)?;
12//! let mut db = Database::open_memory(options)?;
13//!
14//! db.put("my_key", "hello world!")?;
15//! db.flush()?;
16//!
17//! # Ok(())
18//! # }
19//! ```
20//!
21//! For important details, such as limitations and guarantees, see the
22//! README.md file in the project's source code repository.
23
24#![forbid(unsafe_code)]
25#![warn(missing_docs)]
26
27pub mod error;
28pub mod export;
29mod format;
30mod lru;
31mod page;
32mod system;
33mod tree;
34pub mod vfs;
35
36use std::{
37    fmt::Debug,
38    ops::{Bound, RangeBounds},
39    path::{Path, PathBuf},
40    time::{Duration, Instant},
41};
42
43pub use crate::error::Error;
44use crate::format::Format;
45use crate::page::{Metadata as PageMetadata, Page, PageOpenMode, PageTableOptions};
46use crate::tree::{Node, Tree, TreeCursor, TreeMetadata};
47use crate::vfs::{MemoryVfs, OsVfs, ReadOnlyVfs, Vfs, VfsSyncOption};
48
49/// Type alias for an owned key-value pair.
50pub type KeyValuePair = (Vec<u8>, Vec<u8>);
51
52/// Database configuration options.
53#[derive(Debug, Clone)]
54pub struct Options {
55    /// Option when opening a database. Default: LoadOrCreate.
56    pub open_mode: OpenMode,
57
58    /// Maximum number of keys-value pairs per node. Default: 1024.
59    ///
60    /// This value specifies the threshold when a tree node is considered full.
61    /// When a node is full, it is split into two and the tree is rebalanced.
62    ///
63    /// A page contains a single node and a page is stored on disk as one file.
64    ///
65    /// This option shouldn't be changed without making performance and resource usage
66    /// benchmarks.
67    pub keys_per_node: usize,
68
69    /// Maximum number of pages held in memory cache. Default: 64.
70    ///
71    /// The cache is used to store frequently accessed pages for reducing disk operations.
72    ///
73    /// If memory usage is too high, consider decreasing this value first.
74    pub page_cache_size: usize,
75
76    /// Whether to use file locking to prevent corruption by multiple processes.
77    /// Default: true.
78    pub file_locking: bool,
79
80    /// Level of file synchronization to increase durability on disk file systems.
81    /// Default: Data
82    pub file_sync: SyncOption,
83
84    /// Whether to flush the data to the file system periodically when a
85    /// database operation is performed.
86    /// Default: true.
87    ///
88    /// When true, data is flushed when the database is dropped or when enough
89    /// modifications accumulate. Setting this option to false allows you to
90    /// manually persist changes at more optimal points.
91    ///
92    /// There is no background maintenance thread that does automatic flushing;
93    /// automatic flushing occurs when a database modifying function,
94    /// such as put() or remove(), is called.
95    pub automatic_flush: bool,
96
97    /// Number of modifications required for automatic flush to be considered.
98    /// Default: 2048
99    ///
100    /// When the threshold is reached after 300 seconds,
101    /// or the threshold × 2 is reached after 60 seconds,
102    /// a flush is scheduled to be performed on the next modification.
103    pub automatic_flush_threshold: usize,
104
105    /// Compression level for each page. Default: Low.
106    pub compression_level: CompressionLevel,
107}
108
109impl Default for Options {
110    fn default() -> Self {
111        Self {
112            open_mode: OpenMode::default(),
113            keys_per_node: 1024,
114            page_cache_size: 64,
115            file_locking: true,
116            file_sync: SyncOption::default(),
117            automatic_flush: true,
118            automatic_flush_threshold: 2048,
119            compression_level: CompressionLevel::default(),
120        }
121    }
122}
123
124impl Options {
125    fn validate(&self) -> Result<(), Error> {
126        if self.keys_per_node < 2 {
127            return Err(Error::InvalidConfig {
128                message: "required keys_per_node >= 2",
129            });
130        }
131        if self.page_cache_size < 1 {
132            return Err(Error::InvalidConfig {
133                message: "required page_cache_size >= 1",
134            });
135        }
136
137        Ok(())
138    }
139}
140
141impl From<Options> for PageTableOptions {
142    fn from(options: Options) -> Self {
143        Self {
144            open_mode: options.open_mode.into(),
145            page_cache_size: options.page_cache_size,
146            file_locking: options.file_locking,
147            file_sync: options.file_sync.into(),
148            keys_per_node: options.keys_per_node,
149            compression_level: options.compression_level.to_zstd(),
150        }
151    }
152}
153
154/// Database open modes.
155#[derive(Debug, Clone, Copy, PartialEq, Eq)]
156pub enum OpenMode {
157    /// Open an existing database only if it exists.
158    LoadOnly,
159    /// Create a database only if it does not already exist.
160    CreateOnly,
161    /// Open a database, creating it if it does not exist.
162    LoadOrCreate,
163    /// Open an existing database and avoid modifying it.
164    ReadOnly,
165}
166
167impl Default for OpenMode {
168    fn default() -> Self {
169        Self::LoadOrCreate
170    }
171}
172
173impl From<OpenMode> for PageOpenMode {
174    fn from(option: OpenMode) -> Self {
175        match option {
176            OpenMode::LoadOnly => PageOpenMode::LoadOnly,
177            OpenMode::CreateOnly => PageOpenMode::CreateOnly,
178            OpenMode::LoadOrCreate => PageOpenMode::LoadOrCreate,
179            OpenMode::ReadOnly => PageOpenMode::ReadOnly,
180        }
181    }
182}
183
184/// Database data compression level.
185#[derive(Debug, Clone, Copy, PartialEq, Eq)]
186pub enum CompressionLevel {
187    /// Disable compression.
188    None,
189
190    /// Very fast compression speeds at the expense of low compression ratios.
191    ///
192    /// Currently, this corresponds to Zstandard level 1.
193    VeryLow,
194
195    /// Fast compression speeds at the expense of somewhat low compression ratios.
196    ///
197    /// Currently, this corresponds to Zstandard level 3, the default value.
198    Low,
199
200    /// Higher compression ratios at the expense of slower compression speeds.
201    ///
202    /// Currently, this corresponds to Zstandard level 9.
203    Medium,
204
205    /// Best compression ratios at the expense of very slow compression speeds.
206    ///
207    /// Currently, this corresponds to Zstandard level 19.
208    High,
209}
210
211impl Default for CompressionLevel {
212    fn default() -> Self {
213        Self::Low
214    }
215}
216
217impl CompressionLevel {
218    fn to_zstd(&self) -> Option<i32> {
219        match self {
220            Self::None => None,
221            Self::VeryLow => Some(1),
222            Self::Low => Some(3),
223            Self::Medium => Some(9),
224            Self::High => Some(19),
225        }
226    }
227}
228
229/// Level of file synchronization for files created by the database.
230///
231/// These options are equivalent to [`vfs::VfsSyncOption`].
232#[derive(Debug, Clone, Copy, PartialEq, Eq)]
233pub enum SyncOption {
234    /// Don't require any flushing and simply overwrite files.
235    None,
236
237    /// Flush file content only and use file rename technique.
238    ///
239    /// Flush command is equivalent to `File::sync_data()` or Unix `fdatasync()`.
240    Data,
241
242    /// Flush file content including metadata and use file rename technique.
243    ///
244    /// Flush command is equivalent to `File::sync_all()` or Unix `fsync()`.
245    All,
246}
247
248impl Default for SyncOption {
249    fn default() -> Self {
250        Self::Data
251    }
252}
253
254impl From<SyncOption> for VfsSyncOption {
255    fn from(option: SyncOption) -> Self {
256        match option {
257            SyncOption::None => Self::None,
258            SyncOption::Data => Self::Data,
259            SyncOption::All => Self::All,
260        }
261    }
262}
263
264/// GrebeDB database interface.
265pub struct Database {
266    options: Options,
267    tree: Tree,
268    flush_tracker: Option<FlushTracker>,
269}
270
271impl Database {
272    /// Open a database using the given virtual file system and options.
273    pub fn open(vfs: Box<dyn Vfs + Sync + Send>, options: Options) -> Result<Self, Error> {
274        options.validate()?;
275
276        let vfs: Box<dyn Vfs + Sync + Send> = if options.open_mode == OpenMode::ReadOnly {
277            Box::new(ReadOnlyVfs::new(vfs))
278        } else {
279            vfs
280        };
281
282        let mut tree = Tree::open(vfs, options.clone().into())?;
283
284        match options.open_mode {
285            OpenMode::CreateOnly | OpenMode::LoadOrCreate => {
286                tree.init_if_empty()?;
287                tree.upgrade()?;
288            }
289            OpenMode::LoadOnly => {
290                tree.upgrade()?;
291            }
292            _ => {}
293        }
294
295        let flush_tracker = if options.automatic_flush && options.open_mode != OpenMode::ReadOnly {
296            Some(FlushTracker::new(options.automatic_flush_threshold))
297        } else {
298            None
299        };
300
301        Ok(Self {
302            options,
303            tree,
304            flush_tracker,
305        })
306    }
307
308    /// Open a database in temporary memory.
309    pub fn open_memory(options: Options) -> Result<Self, Error> {
310        Self::open(Box::new(MemoryVfs::default()), options)
311    }
312
313    /// Open a database to a path on the disk.
314    ///
315    /// The path must be a directory.
316    pub fn open_path<P>(root_path: P, options: Options) -> Result<Self, Error>
317    where
318        P: Into<PathBuf>,
319    {
320        Self::open(Box::new(OsVfs::new(root_path)), options)
321    }
322
323    /// Return database metadata information.
324    pub fn metadata(&self) -> Metadata {
325        Metadata {
326            tree_metadata: self.tree.metadata(),
327        }
328    }
329
330    /// Return whether the key exists.
331    pub fn contains_key<K>(&mut self, key: K) -> Result<bool, Error>
332    where
333        K: AsRef<[u8]>,
334    {
335        self.tree.contains_key(key.as_ref())
336    }
337
338    /// Retrieve a stored value, by its key, as a vector.
339    pub fn get<K>(&mut self, key: K) -> Result<Option<Vec<u8>>, Error>
340    where
341        K: AsRef<[u8]>,
342    {
343        let mut value = Vec::new();
344        if self.tree.get(key.as_ref(), &mut value)? {
345            Ok(Some(value))
346        } else {
347            Ok(None)
348        }
349    }
350
351    /// Retrieve a stored value, by its key, into the given buffer.
352    ///
353    /// Returns true if the key-value pair was found. The vector will be
354    /// cleared and resized.
355    pub fn get_buf<K>(&mut self, key: K, value_destination: &mut Vec<u8>) -> Result<bool, Error>
356    where
357        K: AsRef<[u8]>,
358    {
359        self.tree.get(key.as_ref(), value_destination)
360    }
361
362    /// Store a key-value pair.
363    pub fn put<K, V>(&mut self, key: K, value: V) -> Result<(), Error>
364    where
365        K: Into<Vec<u8>>,
366        V: Into<Vec<u8>>,
367    {
368        self.maybe_flush(true)?;
369        self.tree.put(key.into(), value.into())
370    }
371
372    /// Remove a key-value pair by its key.
373    ///
374    /// No error occurs if the key does not exist.
375    pub fn remove<K>(&mut self, key: K) -> Result<(), Error>
376    where
377        K: AsRef<[u8]>,
378    {
379        self.maybe_flush(true)?;
380        self.tree.remove(key.as_ref())
381    }
382
383    /// Return a cursor for iterating all the key-value pairs.
384    pub fn cursor(&mut self) -> Result<Cursor<'_>, Error> {
385        Ok(Cursor::new(&mut self.tree))
386    }
387
388    /// Return a cursor for iterating all the key-value pairs within the given
389    /// range.
390    ///
391    /// This method is equivalent of obtaining a cursor and calling
392    /// [`Cursor::seek()`] and [`Cursor::set_range()`]
393    pub fn cursor_range<K, R>(&mut self, range: R) -> Result<Cursor<'_>, Error>
394    where
395        K: AsRef<[u8]>,
396        R: RangeBounds<K>,
397    {
398        let mut cursor = Cursor::new(&mut self.tree);
399
400        match range.start_bound() {
401            Bound::Included(key) => {
402                cursor.seek(key)?;
403            }
404            Bound::Excluded(key) => {
405                let mut key = key.as_ref().to_vec();
406                key.push(0);
407                cursor.seek(key)?;
408            }
409            Bound::Unbounded => {}
410        }
411
412        cursor.set_range(range);
413
414        Ok(cursor)
415    }
416
417    /// Persist all modifications to the file system.
418    ///
419    /// Calling this function ensures that all changes pending, whether cached
420    /// in memory or in files, are atomically saved on the file system
421    /// before this function returns. If the database is not flushed when
422    /// dropped or the program exits, changes since the last successful flush
423    /// will be discarded. This function effectively emulates a transaction.
424    ///
425    /// For details about automatic flushing, see [`Options`].
426    pub fn flush(&mut self) -> Result<(), Error> {
427        self.tree.flush()
428    }
429
430    /// Check the database for internal consistency and data integrity.
431    ///
432    /// The provided callback function is called with the number of items
433    /// processed and the estimated number of items.
434    ///
435    /// The function returns an error on the first verification failure or
436    /// other error.
437    pub fn verify<P>(&mut self, progress_callback: P) -> Result<(), Error>
438    where
439        P: FnMut(usize, usize),
440    {
441        self.tree.verify_tree(progress_callback)
442    }
443
444    /// Print the tree for debugging purposes.
445    pub fn debug_print_tree(&mut self) -> Result<(), Error> {
446        self.tree.dump_tree()
447    }
448
449    fn maybe_flush(&mut self, increment: bool) -> Result<(), Error> {
450        if let Some(flush_tracker) = &mut self.flush_tracker {
451            if increment {
452                flush_tracker.increment_modification();
453            }
454
455            if flush_tracker.check_should_flush() {
456                self.flush()?;
457            }
458        }
459
460        Ok(())
461    }
462}
463
464impl Drop for Database {
465    fn drop(&mut self) {
466        if self.options.automatic_flush && self.options.open_mode != OpenMode::ReadOnly {
467            let _ = self.flush();
468        }
469    }
470}
471
472impl Debug for Database {
473    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
474        write!(f, "Database {{ open_mode: {:?} }}", self.options.open_mode)
475    }
476}
477
478/// Cursor for navigating key-value pairs in sorted order.
479pub struct Cursor<'a> {
480    tree: &'a mut Tree,
481    tree_cursor: TreeCursor,
482    error: Option<Error>,
483    has_seeked: bool,
484    range: (Bound<Vec<u8>>, Bound<Vec<u8>>),
485}
486
487impl<'a> Cursor<'a> {
488    fn new(tree: &'a mut Tree) -> Self {
489        Self {
490            tree,
491            tree_cursor: TreeCursor::default(),
492            error: None,
493            has_seeked: false,
494            range: (Bound::Unbounded, Bound::Unbounded),
495        }
496    }
497
498    /// Return the most recent error.
499    pub fn error(&self) -> Option<&Error> {
500        self.error.as_ref()
501    }
502
503    /// Reposition the cursor at or after the given key.
504    ///
505    /// In other words, the cursor will be positioned to return key-value pairs
506    /// that are equal or greater than the given key.
507    ///
508    /// If a range has been set and the cursor is positioned outside the range,
509    /// the iteration is considered terminated and no key-value pairs will returned.
510    pub fn seek<K>(&mut self, key: K) -> Result<(), Error>
511    where
512        K: AsRef<[u8]>,
513    {
514        self.has_seeked = true;
515        self.tree.cursor_start(&mut self.tree_cursor, key.as_ref())
516    }
517
518    /// Limit the key-value pairs within a range of keys.
519    ///
520    /// The cursor will return key-value pairs where the keys are contained
521    /// within the given range.
522    ///
523    /// This function will not reposition the cursor to a position within the
524    /// range. You must call [`Self::seek()`] manually since the cursor will not
525    /// automatically seek forward to a range's starting bound.
526    pub fn set_range<K, R>(&mut self, range: R)
527    where
528        K: AsRef<[u8]>,
529        R: RangeBounds<K>,
530    {
531        self.range = concrete_range(range);
532    }
533
534    /// Advance the cursor forward and write the key-value pair to the given buffers.
535    ///
536    /// Returns true if the key-value pair was written.
537    /// Returns false if there are no more key-value pairs
538    /// or the cursor is positioned outside the range if set.
539    ///
540    /// The vectors will be cleared and resized.
541    pub fn next_buf(&mut self, key: &mut Vec<u8>, value: &mut Vec<u8>) -> Result<bool, Error> {
542        if !self.has_seeked {
543            self.has_seeked = true;
544            self.tree.cursor_start(&mut self.tree_cursor, b"")?;
545        }
546
547        if self
548            .tree
549            .cursor_next(&mut self.tree_cursor, key, value, &slice_range(&self.range))?
550        {
551            Ok(true)
552        } else {
553            Ok(false)
554        }
555    }
556}
557
558impl<'a> Iterator for Cursor<'a> {
559    type Item = KeyValuePair;
560
561    fn next(&mut self) -> Option<Self::Item> {
562        let mut key_buffer = Vec::new();
563        let mut value_buffer = Vec::new();
564
565        match self.next_buf(&mut key_buffer, &mut value_buffer) {
566            Ok(success) => {
567                if success {
568                    Some((key_buffer, value_buffer))
569                } else {
570                    None
571                }
572            }
573            Err(error) => {
574                self.error = Some(error);
575                None
576            }
577        }
578    }
579}
580
581impl<'a> Debug for Cursor<'a> {
582    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
583        write!(f, "DatabaseCursor")
584    }
585}
586
587#[derive(Debug)]
588/// Additional non-critical information associated with the database.
589pub struct Metadata<'a> {
590    tree_metadata: Option<&'a TreeMetadata>,
591}
592
593impl<'a> Metadata<'a> {
594    /// Return the approximate number of key-value pairs in the database.
595    pub fn key_value_count(&self) -> u64 {
596        if let Some(meta) = self.tree_metadata {
597            meta.key_value_count
598        } else {
599            0
600        }
601    }
602}
603
604struct FlushTracker {
605    base_threshold: usize,
606    modification_count: usize,
607    last_flush_time: Instant,
608}
609
610impl FlushTracker {
611    pub fn new(base_threshold: usize) -> Self {
612        Self {
613            base_threshold,
614            modification_count: 0,
615            last_flush_time: Instant::now(),
616        }
617    }
618
619    pub fn increment_modification(&mut self) {
620        self.modification_count += 1;
621    }
622
623    pub fn check_should_flush(&mut self) -> bool {
624        let level_long = self.modification_count >= self.base_threshold
625            && self.last_flush_time.elapsed() >= Duration::from_secs(300);
626        let level_short = self.modification_count >= self.base_threshold * 2
627            && self.last_flush_time.elapsed() >= Duration::from_secs(60);
628
629        if level_long || level_short {
630            self.modification_count = 0;
631            self.last_flush_time = Instant::now();
632            true
633        } else {
634            false
635        }
636    }
637}
638
639/// Print the page contents for debugging purposes.
640pub fn debug_print_page(path: &Path) -> Result<(), Error> {
641    let mut format = Format::default();
642    let mut vfs = ReadOnlyVfs::new(Box::new(OsVfs::new(path.parent().unwrap())));
643
644    let filename = path.file_name().unwrap().to_str().unwrap();
645
646    if filename.contains("meta") {
647        let payload: PageMetadata<TreeMetadata> = format.read_file(&mut vfs, filename)?;
648
649        eprintln!("{:?}", payload);
650    } else {
651        let payload: Page<Node> = format.read_file(&mut vfs, filename)?;
652
653        eprintln!("{:?}", payload);
654    }
655
656    Ok(())
657}
658
659fn concrete_range<K, R>(range: R) -> (Bound<Vec<u8>>, Bound<Vec<u8>>)
660where
661    K: AsRef<[u8]>,
662    R: RangeBounds<K>,
663{
664    let start_bound: Bound<Vec<u8>> = match range.start_bound() {
665        Bound::Included(bound) => Bound::Included(bound.as_ref().to_vec()),
666        Bound::Excluded(bound) => Bound::Excluded(bound.as_ref().to_vec()),
667        Bound::Unbounded => Bound::Unbounded,
668    };
669    let end_bound: Bound<Vec<u8>> = match range.end_bound() {
670        Bound::Included(bound) => Bound::Included(bound.as_ref().to_vec()),
671        Bound::Excluded(bound) => Bound::Excluded(bound.as_ref().to_vec()),
672        Bound::Unbounded => Bound::Unbounded,
673    };
674    (start_bound, end_bound)
675}
676
677fn slice_range<'a>(
678    range: &'a (Bound<Vec<u8>>, Bound<Vec<u8>>),
679) -> (Bound<&'a [u8]>, Bound<&'a [u8]>) {
680    let start_bound: Bound<&'a [u8]> = match range.start_bound() {
681        Bound::Included(bound) => Bound::Included(bound),
682        Bound::Excluded(bound) => Bound::Excluded(bound),
683        Bound::Unbounded => Bound::Unbounded,
684    };
685    let end_bound: Bound<&'a [u8]> = match range.end_bound() {
686        Bound::Included(bound) => Bound::Included(bound),
687        Bound::Excluded(bound) => Bound::Excluded(bound),
688        Bound::Unbounded => Bound::Unbounded,
689    };
690    (start_bound, end_bound)
691}