1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
// SIMD optimizations (nightly-only)
// Allow retpoline cfg values from multiversion crate's target feature detection
// Pedantic allows for systems code
// Systems code often truncates intentionally
// f64 calculations are approximate by design
// Intentional in hashing/indexing
// Prefer explicit `as` for clarity in hot paths
// Intentional in size calculations
// Internal errors are self-explanatory
// Panics indicate bugs, not expected behavior
// Common pattern for scoped helpers
// Complex DB operations require complex functions
// Builder pattern methods don't need must_use
// Future-proofing for method signatures
// &Option<T> is sometimes clearer than Option<&T>
// Used intentionally in modules
// iter() returns cursor-like types
// clone_from() isn't always better for small types
// .iter() is more explicit than &
// Config structs need many bools
// Builder patterns
// Sometimes explicit lifetimes are clearer
// Prefixing fields is sometimes clearer
// let-else isn't always clearer
// Explicit match arms can be clearer
// Debug impls don't need all fields
// Type::default() is sometimes clearer
// Matching on () is valid
// Wrapping in Option/Result for API consistency
//! seerdb - Research-grade embedded storage engine
//!
//! A modern LSM-tree based key-value storage engine implementing 2018-2024 research
//! on learned data structures, workload-aware optimization, and efficient key-value separation.
//!
//! # Features
//!
//! - **LSM-tree architecture**: Write-optimized with efficient compaction
//! - **Durability**: Write-ahead logging with configurable sync policies
//! - **Concurrency**: Lock-free reads with concurrent writes
//! - **Observability**: Built-in metrics, health checks, and structured logging
//! - **Key-Value Separation**: WiscKey-style vLog for large values (reduces write amplification)
//! - **Background Compaction**: Non-blocking async compaction for better write throughput
//!
//! # Quick Start
//!
//! ```rust,no_run
//! use seerdb::DB;
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // Open database with default options
//! let db = DB::open("./my_database")?;
//!
//! // Write data
//! db.put(b"hello", b"world")?;
//!
//! // Read data
//! let value = db.get(b"hello")?;
//! assert_eq!(value, Some(bytes::Bytes::from("world")));
//!
//! // Delete data
//! db.delete(b"hello")?;
//! # Ok(())
//! # }
//! ```
//!
//! # Configuration
//!
//! The defaults work well for most cases. Customize only what you need:
//!
//! ```rust,no_run
//! use seerdb::{DBOptions, SyncPolicy};
//!
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! // Customize specific options
//! let db = DBOptions::default()
//! .memtable_capacity(512 * 1024 * 1024) // 512MB write buffer
//! .open("./my_database")?;
//!
//! // Or use a preset profile
//! let db = DBOptions::high_throughput()
//! .open("./my_database")?;
//! # Ok(())
//! # }
//! ```
//!
//! See [`DBOptions`] for all configuration options and profiles.
//!
//! # Architecture
//!
//! seerdb uses an LSM-tree architecture with the following components:
//!
//! - **Memtable**: In-memory buffer using concurrent skiplist
//! - **WAL**: Write-ahead log for durability
//! - **`SSTable`**: Sorted string tables on disk with bloom filters
//! - **LSM Levels**: 7 levels with exponential sizing (10x ratio)
//! - **`VLog`**: Optional value log for key-value separation (large values)
//! - **Compaction**: Background merge of `SSTables` to reduce read amplification
//!
//! # Performance Characteristics
//!
//! - **Writes**: O(log n) in-memory + O(1) WAL append
//! - **Reads**: O(log n) skiplist + O(levels) `SSTable` lookups with bloom filter optimization
//! - **Scans**: Efficient via merge iteration over memtable + `SSTables`
//! - **Space Amplification**: ~2x (typical LSM-tree)
//! - **Write Amplification**: 10-30x (reduced with vLog for large values)
//!
//! # Durability Guarantees
//!
//! seerdb provides configurable durability via [`SyncPolicy`]:
//!
//! | Policy | Survives | Performance |
//! |--------|----------|-------------|
//! | `SyncAll` | Power loss | ~4 ms |
//! | `SyncData` | Power loss | ~5 µs Linux, ~4 ms macOS |
//! | `Barrier` | App crash | ~5 µs Linux, ~0.3 ms macOS |
//! | `None` | Nothing | ~4 µs |
//!
//! **macOS note**: `SyncData` is slow on macOS due to APFS. Use `Barrier` for
//! high-throughput writes when power-loss durability isn't required.
//! See [`SyncPolicy`] for details.
//!
//! # Observability
//!
//! Built-in metrics and health checks for production deployment:
//!
//! ```rust,ignore
//! # use seerdb::{DB, DBOptions};
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! # let db = DB::open(DBOptions::default())?;
//! // Get current database statistics
//! let stats = db.stats();
//! println!("Operations: {} reads, {} writes", stats.total_reads, stats.total_writes);
//!
//! // Check database health
//! let health = db.health();
//! println!("Health: {:?}", health);
//! # Ok(())
//! # }
//! ```
// Use jemalloc as the global allocator for better multi-threaded performance
// Tested jemalloc vs mimalloc: jemalloc wins 3/4 workloads (+17-21% improvement)
// Uses disable_initial_exec_tls to work with Python extensions on Linux (glibc TLS limitation)
// Disabled when using dhat profiler (conflicts with #[global_allocator])
static GLOBAL: Jemalloc = Jemalloc;
// Failpoint support for deterministic crash testing (must be first for macro availability)
// Internal modules (not re-exported, but accessible for tests)
// Public modules (user-facing API)
// Re-export public API types
// Core database types
pub use StorageConfig;
pub use ;
// Configuration
pub use CompressionType;
pub use ;
// Operations
pub use Batch;
pub use ;
pub use Snapshot;
pub use ;
// Merge operators (user-extensible)
pub use ;
// Observability
pub use ;
pub use DBStats;
// Bulk operations
pub use ;