obj/config.rs
1//! `Db` open-time configuration.
2//!
3//! Mirrors the design.md `Config` builder pattern. Currently a thin
4//! wrapper around `obj_core::pager::Config` plus the M6-specific
5//! `busy_timeout` knob.
6
7use std::time::Duration;
8
9use obj_core::pager::CompressionMode;
10use obj_core::pager::Config as PagerConfig;
11use obj_core::SyncMode;
12
13/// Upper bound on the LRU cache size, expressed in 4 KiB frames.
14/// [`Config::cache_size`] clamps any request above this ceiling down
15/// to it rather than erroring. `4_194_304` frames × 4 KiB = 16 GiB —
16/// far above any realistic working set, but bounded so a bogus
17/// `usize::MAX` byte count cannot ask the pager to pre-size an
18/// absurd cache (Power-of-ten Rule 3 — keep allocation bounds
19/// explicit).
20pub const MAX_CACHE_FRAMES: usize = 4_194_304;
21
22/// `Db` open-time configuration. Construct via [`Config::default`]
23/// and modify with the builder methods.
24///
25/// `Debug` is implemented manually so the embedded
26/// `pager.encryption_key` field never leaks key material — the
27/// derived `Debug` on the pager's `Config` already redacts it, but
28/// implementing it manually here keeps the redaction story local
29/// for the obj-db crate as well.
30///
31/// # Examples
32///
33/// Chain the setters from [`Config::default`] and hand the result
34/// to [`Db::open_with`](crate::Db::open_with):
35///
36/// ```
37/// # fn main() -> obj::Result<()> {
38/// use obj::{Config, Db, SyncMode};
39/// use std::time::Duration;
40///
41/// let dir = tempfile::tempdir()?;
42///
43/// let cfg = Config::default()
44/// // Cache size in bytes. Rounded down to whole 4 KiB pages and
45/// // clamped into range. Default: 256 KiB (64 frames).
46/// .cache_size(64 * 1024 * 1024)
47/// // Durability mode used by the WAL on every commit.
48/// // Default: SyncMode::Full (survives system-wide power loss).
49/// .sync_mode(SyncMode::Full)
50/// // Maximum wait when acquiring the writer / reader lock.
51/// // Default: 5 seconds. Beyond the budget, the txn returns
52/// // `Err(Error::Busy)` rather than blocking indefinitely.
53/// .busy_timeout(Duration::from_secs(2))
54/// // Skip the open-time catalog walk. Default: false. Production
55/// // callers should leave this alone.
56/// .skip_open_check(false)
57/// // Cross-process file locking. Default: true.
58/// .cross_process_lock(true);
59///
60/// let _db = Db::open_with(dir.path().join("configured.obj"), cfg)?;
61/// # Ok(())
62/// # }
63/// ```
64///
65/// Quick reference for when to change each knob:
66///
67/// - [`Config::cache_size`] — bigger cache for read-heavy
68/// workloads on large databases; tiny cache on
69/// memory-constrained targets.
70/// - [`Config::sync_mode`] — [`SyncMode::Normal`] if you accept
71/// losing the last few milliseconds of writes on a power loss;
72/// [`SyncMode::Off`] only for tests and benchmarks.
73/// - [`Config::busy_timeout`] — shorter when the caller prefers a
74/// fast `Error::Busy` to a long wait; longer when contention is
75/// rare and you would rather block than retry.
76/// - [`Config::skip_open_check`] — leave on in production. The
77/// narrow use-cases are fault-injection harnesses, hot-reload
78/// tooling that opens the same file many times per second, and
79/// developer workflows that have just run a full
80/// `integrity_check`.
81/// - [`Config::cross_process_lock`] — leave on for any real
82/// deployment. The off path is for in-process stress tests where
83/// one shared `Db` serves many threads on a single fd.
84// `Copy` is intentionally NOT derived: the embedded
85// `pager.encryption_key` is key material, and `Copy` would let it be
86// duplicated freely and would permanently preclude a future
87// `Zeroize`-on-`Drop` impl (issue #31). `Clone` is kept for the
88// builder chain.
89#[derive(Clone)]
90#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
91pub struct Config {
92 pub(crate) pager: PagerConfig,
93 pub(crate) busy_timeout: Duration,
94 pub(crate) readonly: bool,
95 pub(crate) cross_process_lock: bool,
96 pub(crate) skip_open_check: bool,
97}
98
99impl std::fmt::Debug for Config {
100 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
101 // The embedded `pager` already has a manual `Debug` that
102 // redacts the encryption key.
103 f.debug_struct("Config")
104 .field("pager", &self.pager)
105 .field("busy_timeout", &self.busy_timeout)
106 .field("readonly", &self.readonly)
107 .field("cross_process_lock", &self.cross_process_lock)
108 .field("skip_open_check", &self.skip_open_check)
109 .finish()
110 }
111}
112
113impl Default for Config {
114 fn default() -> Self {
115 Self {
116 pager: PagerConfig::default(),
117 busy_timeout: obj_core::DEFAULT_BUSY_TIMEOUT,
118 readonly: false,
119 cross_process_lock: true,
120 skip_open_check: false,
121 }
122 }
123}
124
125impl Config {
126 /// Set the pager's LRU cache size, in bytes. Rounded down to
127 /// the nearest 4 KiB page and clamped into the supported range.
128 ///
129 /// A `bytes` value smaller than one page is clamped UP to a
130 /// single frame (the pager requires at least one); a value large
131 /// enough to exceed `MAX_CACHE_FRAMES` is clamped DOWN to that
132 /// ceiling. This keeps the builder infallible so it chains like
133 /// every other setter (issue #47) — there is no out-of-range
134 /// error to surface.
135 #[must_use]
136 pub fn cache_size(self, bytes: usize) -> Self {
137 let frames = (bytes / obj_core::pager::PAGER_PAGE_SIZE).clamp(1, MAX_CACHE_FRAMES);
138 // `frames` is guaranteed `>= 1` by the clamp, so the pager's
139 // only failure mode (`cache_frames == 0`) cannot occur; map
140 // the impossible error to the unclamped pager rather than
141 // panicking (Power-of-ten Rule 7).
142 //
143 // Issue #31: `with_cache_frames` consumes the pager `Config`
144 // by value and returns `Result<Config, _>` without handing the
145 // input back on error, so the previous "fall back to `self`"
146 // arm relied on the pager `Config` being `Copy`. The pager
147 // `Config` is no longer `Copy` on the `encryption` build (its
148 // `encryption_key` zeroizes on drop). Mutate the public
149 // `cache_frames` field in place instead: `frames` is clamped
150 // `>= 1`, so this is exactly the success path of
151 // `with_cache_frames` with no fallible move to recover from.
152 let mut pager = self.pager;
153 pager.cache_frames = frames;
154 Self { pager, ..self }
155 }
156
157 /// Set the durability mode the WAL uses for every commit.
158 #[must_use]
159 pub fn sync_mode(self, mode: SyncMode) -> Self {
160 Self {
161 pager: self.pager.with_sync_mode(mode),
162 ..self
163 }
164 }
165
166 /// Set the cross-process / in-process busy-lock timeout.
167 /// `WriteTxn::begin` and `ReadTxn::begin` return
168 /// `Err(Error::Busy)` if the relevant lock cannot be acquired
169 /// within this budget.
170 #[must_use]
171 pub fn busy_timeout(self, timeout: Duration) -> Self {
172 Self {
173 busy_timeout: timeout,
174 ..self
175 }
176 }
177
178 /// Skip the lightweight open-time integrity check (M11 #91).
179 ///
180 /// By default (`false`), [`crate::Db::open`] / [`crate::Db::open_with`]
181 /// run a fast subset of [`crate::Db::integrity_check`] before
182 /// returning: file-header CRC, catalog root sanity, catalog
183 /// B-tree CRC + invariants, and per-collection pointer-range
184 /// validation. The walk is bounded to the catalog tree only —
185 /// no per-collection deep walk — so the cost is essentially
186 /// independent of the database's total size.
187 ///
188 /// Set to `true` to opt out. The knob exists for narrow use
189 /// cases — fault-injection harnesses that deliberately open a
190 /// corrupted DB to exercise downstream error paths, hot-reload
191 /// tooling that re-opens the same file many times per second,
192 /// or developer workflows that have just run a full
193 /// `Db::integrity_check` and don't want to repeat the catalog
194 /// portion. Production callers SHOULD leave it on.
195 ///
196 /// Skipping the open check does NOT bypass detection: a
197 /// corrupted page surfaces on the first operation that touches
198 /// it. Note also that the obj `Db` constructor performs an
199 /// implicit `Catalog::open_or_init` that reads the catalog
200 /// B-tree's reserved row; a DB whose catalog tree is so
201 /// corrupted that descend fails will still error out of the
202 /// open path even with `skip_open_check(true)`. The knob's
203 /// guarantee is "no EXTRA walk beyond what was already
204 /// required to construct the Db handle."
205 #[must_use]
206 pub fn skip_open_check(self, skip: bool) -> Self {
207 Self {
208 skip_open_check: skip,
209 ..self
210 }
211 }
212
213 /// Phase 3 (issue #8): select per-page compression for new
214 /// files.
215 ///
216 /// `mode = CompressionMode::Lz4` causes [`crate::Db::open_with`]
217 /// to create a brand-new database file at `format_minor = 2`
218 /// (the v1.0 feature-complete minor; the LZ4 layer is signalled
219 /// by a per-page flag bit, not by the minor version) with the
220 /// LZ4 page-compression layer engaged. Pages are
221 /// compressed at the pager layer only — every higher-level
222 /// encoder (B-tree, freelist, catalog, document) still
223 /// operates on the 4092-byte logical body. Compression is
224 /// fully transparent to user code.
225 ///
226 /// **No-op against existing files.** When the database file
227 /// already exists, its on-disk header dictates whether
228 /// compression is in use; this knob is consulted only on
229 /// file creation. Opening an existing `format_minor = 0`
230 /// (uncompressed) file with
231 /// `Config::compression(CompressionMode::Lz4)` does NOT
232 /// upgrade the file; reads and writes continue to use the
233 /// uncompressed layout. Migrating an existing database to
234 /// compression is deferred to a future tool.
235 ///
236 /// **Build-time requirement.** Compression requires the
237 /// `compression` Cargo feature on the `obj-db` crate (which
238 /// in turn enables `obj-core/compression`). A build WITHOUT
239 /// the feature that calls `Config::compression(Lz4)` will
240 /// return `Error::FormatFeatureUnsupported { feature:
241 /// "compression" }` from `Db::open_with` at the moment a
242 /// new file would otherwise be created.
243 #[must_use]
244 pub fn compression(self, mode: CompressionMode) -> Self {
245 Self {
246 pager: self.pager.with_compression_mode(mode),
247 ..self
248 }
249 }
250
251 /// Phase 4 (issue #9): supply a 32-byte master encryption key.
252 ///
253 /// When set on a **new** database file, the file is created at
254 /// `format_minor = 2` with `feature_flags` bit 1 set and a fresh
255 /// CSPRNG-generated `kdf_salt` stored plaintext in the page-0
256 /// header. Every non-header page is encrypted with
257 /// XChaCha20-Poly1305: 4096-byte logical page → 4136-byte
258 /// physical page (24-byte nonce + 16-byte tag).
259 ///
260 /// When set on an **existing** database file:
261 /// - If the file is `format_minor = 2` (encryption-capable):
262 /// the key is used to derive the per-file page key via
263 /// `HKDF-SHA256(key, kdf_salt, b"obj-page-encryption-v1")`.
264 /// A wrong key surfaces as
265 /// [`Error::EncryptionKeyInvalid`](obj_core::Error::EncryptionKeyInvalid)
266 /// on the first encrypted page read.
267 /// - If the file is `format_minor < 2`: open returns
268 /// [`Error::EncryptionKeyMismatch`](obj_core::Error::EncryptionKeyMismatch).
269 ///
270 /// **Build-time requirement.** Encryption requires the
271 /// `encryption` Cargo feature on the `obj-db` crate (which
272 /// propagates to `obj-core/encryption`). A build WITHOUT the
273 /// feature that sets a key returns
274 /// [`Error::FormatFeatureUnsupported`](obj_core::Error::FormatFeatureUnsupported)
275 /// from `Db::open_with` (`feature = "encryption"`).
276 ///
277 /// The key is held in memory inside the obj-core pager
278 /// [`Config`](obj_core::pager::Config); the `Debug` impl
279 /// redacts it (`encryption_key: "<set>"`). The key is NOT
280 /// persisted to disk. Callers are responsible for the master
281 /// key's lifecycle (storage, rotation, derivation from a
282 /// passphrase via Argon2/scrypt, etc. — those are out of
283 /// scope for this issue).
284 ///
285 /// Zeroising the key on drop (via the `zeroize` crate) is a
286 /// known follow-up; it is not implemented here.
287 #[must_use]
288 pub fn encryption_key(self, key: [u8; 32]) -> Self {
289 Self {
290 pager: self.pager.with_encryption_key(Some(key)),
291 ..self
292 }
293 }
294
295 /// Enable / disable the cross-process file lock layer.
296 ///
297 /// When `false`, the [`Db`](crate::Db) opened with this config
298 /// does NOT acquire OS-level byte-range locks on the database
299 /// file. Used by the M6 #49 concurrent stress test where every
300 /// thread shares one `Db` (and therefore one file descriptor):
301 /// POSIX OFD locks are per-fd, so multiple threads on the same
302 /// fd cannot use the lock to enforce inter-thread exclusion —
303 /// that's what the in-process write-serialization mutex is for.
304 ///
305 /// Default: `true` (cross-process locking enabled).
306 #[must_use]
307 pub fn cross_process_lock(self, enabled: bool) -> Self {
308 Self {
309 cross_process_lock: enabled,
310 ..self
311 }
312 }
313}