Skip to main content

obj/
config.rs

1//! `Db` open-time configuration.
2//!
3//! Mirrors the [`design.md`](https://github.com/uname-n/obj/blob/master/design.md)
4//! `Config` builder pattern.  Currently a thin
5//! wrapper around `obj_core::pager::Config` plus the M6-specific
6//! `busy_timeout` knob.
7
8use std::time::Duration;
9
10use obj_core::pager::CompressionMode;
11use obj_core::pager::Config as PagerConfig;
12use obj_core::SyncMode;
13
14/// Upper bound on the LRU cache size, expressed in 4 KiB frames.
15/// [`Config::cache_size`] clamps any request above this ceiling down
16/// to it rather than erroring. `4_194_304` frames × 4 KiB = 16 GiB —
17/// far above any realistic working set, but bounded so a bogus
18/// `usize::MAX` byte count cannot ask the pager to pre-size an
19/// absurd cache (Power-of-ten Rule 3 — keep allocation bounds
20/// explicit).
21pub const MAX_CACHE_FRAMES: usize = 4_194_304;
22
23/// `Db` open-time configuration.  Construct via [`Config::default`]
24/// and modify with the builder methods.
25///
26/// `Debug` is implemented manually so the embedded
27/// `pager.encryption_key` field never leaks key material — the
28/// derived `Debug` on the pager's `Config` already redacts it, but
29/// implementing it manually here keeps the redaction story local
30/// for the obj-db crate as well.
31///
32/// # Examples
33///
34/// Chain the setters from [`Config::default`] and hand the result
35/// to [`Db::open_with`](crate::Db::open_with):
36///
37/// ```
38/// # fn main() -> obj::Result<()> {
39/// use obj::{Config, Db, SyncMode};
40/// use std::time::Duration;
41///
42/// let dir = tempfile::tempdir()?;
43///
44/// let cfg = Config::default()
45///     // Cache size in bytes. Rounded down to whole 4 KiB pages and
46///     // clamped into range. Default: 256 KiB (64 frames).
47///     .cache_size(64 * 1024 * 1024)
48///     // Durability mode used by the WAL on every commit.
49///     // Default: SyncMode::Full (survives system-wide power loss).
50///     .sync_mode(SyncMode::Full)
51///     // Maximum wait when acquiring the writer / reader lock.
52///     // Default: 5 seconds. Beyond the budget, the txn returns
53///     // `Err(Error::Busy)` rather than blocking indefinitely.
54///     .busy_timeout(Duration::from_secs(2))
55///     // Skip the open-time catalog walk. Default: false. Production
56///     // callers should leave this alone.
57///     .skip_open_check(false)
58///     // Cross-process file locking. Default: true.
59///     .cross_process_lock(true);
60///
61/// let _db = Db::open_with(dir.path().join("configured.obj"), cfg)?;
62/// # Ok(())
63/// # }
64/// ```
65///
66/// Quick reference for when to change each knob:
67///
68/// - [`Config::cache_size`] — bigger cache for read-heavy
69///   workloads on large databases; tiny cache on
70///   memory-constrained targets.
71/// - [`Config::sync_mode`] — [`SyncMode::Normal`] if you accept
72///   losing the last few milliseconds of writes on a power loss;
73///   [`SyncMode::Off`] only for tests and benchmarks.
74/// - [`Config::busy_timeout`] — shorter when the caller prefers a
75///   fast `Error::Busy` to a long wait; longer when contention is
76///   rare and you would rather block than retry.
77/// - [`Config::skip_open_check`] — leave on in production. The
78///   narrow use-cases are fault-injection harnesses, hot-reload
79///   tooling that opens the same file many times per second, and
80///   developer workflows that have just run a full
81///   `integrity_check`.
82/// - [`Config::cross_process_lock`] — leave on for any real
83///   deployment. The off path is for in-process stress tests where
84///   one shared `Db` serves many threads on a single fd.
85// `Copy` is intentionally NOT derived: the embedded
86// `pager.encryption_key` is key material, and `Copy` would let it be
87// duplicated freely and would permanently preclude a future
88// `Zeroize`-on-`Drop` impl (issue #31). `Clone` is kept for the
89// builder chain.
90#[derive(Clone)]
91#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
92pub struct Config {
93    pub(crate) pager: PagerConfig,
94    pub(crate) busy_timeout: Duration,
95    pub(crate) readonly: bool,
96    pub(crate) cross_process_lock: bool,
97    pub(crate) skip_open_check: bool,
98}
99
100impl std::fmt::Debug for Config {
101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102        // The embedded `pager` already has a manual `Debug` that
103        // redacts the encryption key.
104        f.debug_struct("Config")
105            .field("pager", &self.pager)
106            .field("busy_timeout", &self.busy_timeout)
107            .field("readonly", &self.readonly)
108            .field("cross_process_lock", &self.cross_process_lock)
109            .field("skip_open_check", &self.skip_open_check)
110            .finish()
111    }
112}
113
114impl Default for Config {
115    fn default() -> Self {
116        Self {
117            pager: PagerConfig::default(),
118            busy_timeout: obj_core::DEFAULT_BUSY_TIMEOUT,
119            readonly: false,
120            cross_process_lock: true,
121            skip_open_check: false,
122        }
123    }
124}
125
126impl Config {
127    /// Set the pager's LRU cache size, in bytes.  Rounded down to
128    /// the nearest 4 KiB page and clamped into the supported range.
129    ///
130    /// A `bytes` value smaller than one page is clamped UP to a
131    /// single frame (the pager requires at least one); a value large
132    /// enough to exceed `MAX_CACHE_FRAMES` is clamped DOWN to that
133    /// ceiling. This keeps the builder infallible so it chains like
134    /// every other setter (issue #47) — there is no out-of-range
135    /// error to surface.
136    #[must_use]
137    pub fn cache_size(self, bytes: usize) -> Self {
138        let frames = (bytes / obj_core::pager::PAGER_PAGE_SIZE).clamp(1, MAX_CACHE_FRAMES);
139        // `frames` is guaranteed `>= 1` by the clamp, so the pager's
140        // only failure mode (`cache_frames == 0`) cannot occur; map
141        // the impossible error to the unclamped pager rather than
142        // panicking (Power-of-ten Rule 7).
143        //
144        // Issue #31: `with_cache_frames` consumes the pager `Config`
145        // by value and returns `Result<Config, _>` without handing the
146        // input back on error, so the previous "fall back to `self`"
147        // arm relied on the pager `Config` being `Copy`. The pager
148        // `Config` is no longer `Copy` on the `encryption` build (its
149        // `encryption_key` zeroizes on drop). Mutate the public
150        // `cache_frames` field in place instead: `frames` is clamped
151        // `>= 1`, so this is exactly the success path of
152        // `with_cache_frames` with no fallible move to recover from.
153        let mut pager = self.pager;
154        pager.cache_frames = frames;
155        Self { pager, ..self }
156    }
157
158    /// Set the durability mode the WAL uses for every commit.
159    #[must_use]
160    pub fn sync_mode(self, mode: SyncMode) -> Self {
161        Self {
162            pager: self.pager.with_sync_mode(mode),
163            ..self
164        }
165    }
166
167    /// Set the cross-process / in-process busy-lock timeout.
168    /// `WriteTxn::begin` and `ReadTxn::begin` return
169    /// `Err(Error::Busy)` if the relevant lock cannot be acquired
170    /// within this budget.
171    #[must_use]
172    pub fn busy_timeout(self, timeout: Duration) -> Self {
173        Self {
174            busy_timeout: timeout,
175            ..self
176        }
177    }
178
179    /// Skip the lightweight open-time integrity check (M11 #91).
180    ///
181    /// By default (`false`), [`crate::Db::open`] / [`crate::Db::open_with`]
182    /// run a fast subset of [`crate::Db::integrity_check`] before
183    /// returning: file-header CRC, catalog root sanity, catalog
184    /// B-tree CRC + invariants, and per-collection pointer-range
185    /// validation. The walk is bounded to the catalog tree only —
186    /// no per-collection deep walk — so the cost is essentially
187    /// independent of the database's total size.
188    ///
189    /// Set to `true` to opt out. The knob exists for narrow use
190    /// cases — fault-injection harnesses that deliberately open a
191    /// corrupted DB to exercise downstream error paths, hot-reload
192    /// tooling that re-opens the same file many times per second,
193    /// or developer workflows that have just run a full
194    /// `Db::integrity_check` and don't want to repeat the catalog
195    /// portion. Production callers SHOULD leave it on.
196    ///
197    /// Skipping the open check does NOT bypass detection: a
198    /// corrupted page surfaces on the first operation that touches
199    /// it. Note also that the obj `Db` constructor performs an
200    /// implicit `Catalog::open_or_init` that reads the catalog
201    /// B-tree's reserved row; a DB whose catalog tree is so
202    /// corrupted that descend fails will still error out of the
203    /// open path even with `skip_open_check(true)`. The knob's
204    /// guarantee is "no EXTRA walk beyond what was already
205    /// required to construct the Db handle."
206    #[must_use]
207    pub fn skip_open_check(self, skip: bool) -> Self {
208        Self {
209            skip_open_check: skip,
210            ..self
211        }
212    }
213
214    /// Phase 3 (issue #8): select per-page compression for new
215    /// files.
216    ///
217    /// `mode = CompressionMode::Lz4` causes [`crate::Db::open_with`]
218    /// to create a brand-new database file at `format_minor = 2`
219    /// (the v1.0 feature-complete minor; the LZ4 layer is signalled
220    /// by a per-page flag bit, not by the minor version) with the
221    /// LZ4 page-compression layer engaged. Pages are
222    /// compressed at the pager layer only — every higher-level
223    /// encoder (B-tree, freelist, catalog, document) still
224    /// operates on the 4092-byte logical body. Compression is
225    /// fully transparent to user code.
226    ///
227    /// **No-op against existing files.** When the database file
228    /// already exists, its on-disk header dictates whether
229    /// compression is in use; this knob is consulted only on
230    /// file creation. Opening an existing `format_minor = 0`
231    /// (uncompressed) file with
232    /// `Config::compression(CompressionMode::Lz4)` does NOT
233    /// upgrade the file; reads and writes continue to use the
234    /// uncompressed layout. Migrating an existing database to
235    /// compression is deferred to a future tool.
236    ///
237    /// **Build-time requirement.** Compression requires the
238    /// `compression` Cargo feature on the `obj-db` crate (which
239    /// in turn enables `obj-core/compression`). A build WITHOUT
240    /// the feature that calls `Config::compression(Lz4)` will
241    /// return `Error::FormatFeatureUnsupported { feature:
242    /// "compression" }` from `Db::open_with` at the moment a
243    /// new file would otherwise be created.
244    #[must_use]
245    pub fn compression(self, mode: CompressionMode) -> Self {
246        Self {
247            pager: self.pager.with_compression_mode(mode),
248            ..self
249        }
250    }
251
252    /// Phase 4 (issue #9): supply a 32-byte master encryption key.
253    ///
254    /// When set on a **new** database file, the file is created at
255    /// `format_minor = 2` with `feature_flags` bit 1 set and a fresh
256    /// CSPRNG-generated `kdf_salt` stored plaintext in the page-0
257    /// header. Every non-header page is encrypted with
258    /// XChaCha20-Poly1305: 4096-byte logical page → 4136-byte
259    /// physical page (24-byte nonce + 16-byte tag).
260    ///
261    /// When set on an **existing** database file:
262    /// - If the file is `format_minor = 2` (encryption-capable):
263    ///   the key is used to derive the per-file page key via
264    ///   `HKDF-SHA256(key, kdf_salt, b"obj-page-encryption-v1")`.
265    ///   A wrong key surfaces as
266    ///   [`Error::EncryptionKeyInvalid`](obj_core::Error::EncryptionKeyInvalid)
267    ///   on the first encrypted page read.
268    /// - If the file is `format_minor < 2`: open returns
269    ///   [`Error::EncryptionKeyMismatch`](obj_core::Error::EncryptionKeyMismatch).
270    ///
271    /// **Build-time requirement.** Encryption requires the
272    /// `encryption` Cargo feature on the `obj-db` crate (which
273    /// propagates to `obj-core/encryption`). A build WITHOUT the
274    /// feature that sets a key returns
275    /// [`Error::FormatFeatureUnsupported`](obj_core::Error::FormatFeatureUnsupported)
276    /// from `Db::open_with` (`feature = "encryption"`).
277    ///
278    /// The key is held in memory inside the obj-core pager
279    /// [`Config`](obj_core::pager::Config); the `Debug` impl
280    /// redacts it (`encryption_key: "<set>"`). The key is NOT
281    /// persisted to disk. Callers are responsible for the master
282    /// key's lifecycle (storage, rotation, derivation from a
283    /// passphrase via Argon2/scrypt, etc. — those are out of
284    /// scope for this issue).
285    ///
286    /// Zeroising the key on drop (via the `zeroize` crate) is a
287    /// known follow-up; it is not implemented here.
288    #[must_use]
289    pub fn encryption_key(self, key: [u8; 32]) -> Self {
290        Self {
291            pager: self.pager.with_encryption_key(Some(key)),
292            ..self
293        }
294    }
295
296    /// Enable / disable the cross-process file lock layer.
297    ///
298    /// When `false`, the [`Db`](crate::Db) opened with this config
299    /// does NOT acquire OS-level byte-range locks on the database
300    /// file.  Used by the M6 #49 concurrent stress test where every
301    /// thread shares one `Db` (and therefore one file descriptor):
302    /// POSIX OFD locks are per-fd, so multiple threads on the same
303    /// fd cannot use the lock to enforce inter-thread exclusion —
304    /// that's what the in-process write-serialization mutex is for.
305    ///
306    /// Default: `true` (cross-process locking enabled).
307    #[must_use]
308    pub fn cross_process_lock(self, enabled: bool) -> Self {
309        Self {
310            cross_process_lock: enabled,
311            ..self
312        }
313    }
314}