obj/config.rs
1//! `Db` open-time configuration.
2//!
3//! Mirrors the [`design.md`](https://github.com/uname-n/obj/blob/master/design.md)
4//! `Config` builder pattern. Currently a thin
5//! wrapper around `obj_core::pager::Config` plus the M6-specific
6//! `busy_timeout` knob.
7
8use std::time::Duration;
9
10use obj_core::pager::CompressionMode;
11use obj_core::pager::Config as PagerConfig;
12use obj_core::SyncMode;
13
14/// Upper bound on the LRU cache size, expressed in 4 KiB frames.
15/// [`Config::cache_size`] clamps any request above this ceiling down
16/// to it rather than erroring. `4_194_304` frames × 4 KiB = 16 GiB —
17/// far above any realistic working set, but bounded so a bogus
18/// `usize::MAX` byte count cannot ask the pager to pre-size an
19/// absurd cache (Power-of-ten Rule 3 — keep allocation bounds
20/// explicit).
21pub const MAX_CACHE_FRAMES: usize = 4_194_304;
22
23/// `Db` open-time configuration. Construct via [`Config::default`]
24/// and modify with the builder methods.
25///
26/// `Debug` is implemented manually so the embedded
27/// `pager.encryption_key` field never leaks key material — the
28/// derived `Debug` on the pager's `Config` already redacts it, but
29/// implementing it manually here keeps the redaction story local
30/// for the obj-db crate as well.
31///
32/// # Examples
33///
34/// Chain the setters from [`Config::default`] and hand the result
35/// to [`Db::open_with`](crate::Db::open_with):
36///
37/// ```
38/// # fn main() -> obj::Result<()> {
39/// use obj::{Config, Db, SyncMode};
40/// use std::time::Duration;
41///
42/// let dir = tempfile::tempdir()?;
43///
44/// let cfg = Config::default()
45/// // Cache size in bytes. Rounded down to whole 4 KiB pages and
46/// // clamped into range. Default: 256 KiB (64 frames).
47/// .cache_size(64 * 1024 * 1024)
48/// // Durability mode used by the WAL on every commit.
49/// // Default: SyncMode::Full (survives system-wide power loss).
50/// .sync_mode(SyncMode::Full)
51/// // Maximum wait when acquiring the writer / reader lock.
52/// // Default: 5 seconds. Beyond the budget, the txn returns
53/// // `Err(Error::Busy)` rather than blocking indefinitely.
54/// .busy_timeout(Duration::from_secs(2))
55/// // Skip the open-time catalog walk. Default: false. Production
56/// // callers should leave this alone.
57/// .skip_open_check(false)
58/// // Cross-process file locking. Default: true.
59/// .cross_process_lock(true);
60///
61/// let _db = Db::open_with(dir.path().join("configured.obj"), cfg)?;
62/// # Ok(())
63/// # }
64/// ```
65///
66/// Quick reference for when to change each knob:
67///
68/// - [`Config::cache_size`] — bigger cache for read-heavy
69/// workloads on large databases; tiny cache on
70/// memory-constrained targets.
71/// - [`Config::sync_mode`] — [`SyncMode::Normal`] if you accept
72/// losing the last few milliseconds of writes on a power loss;
73/// [`SyncMode::Off`] only for tests and benchmarks.
74/// - [`Config::busy_timeout`] — shorter when the caller prefers a
75/// fast `Error::Busy` to a long wait; longer when contention is
76/// rare and you would rather block than retry.
77/// - [`Config::skip_open_check`] — leave on in production. The
78/// narrow use-cases are fault-injection harnesses, hot-reload
79/// tooling that opens the same file many times per second, and
80/// developer workflows that have just run a full
81/// `integrity_check`.
82/// - [`Config::cross_process_lock`] — leave on for any real
83/// deployment. The off path is for in-process stress tests where
84/// one shared `Db` serves many threads on a single fd.
85// `Copy` is intentionally NOT derived: the embedded
86// `pager.encryption_key` is key material, and `Copy` would let it be
87// duplicated freely and would permanently preclude a future
88// `Zeroize`-on-`Drop` impl (issue #31). `Clone` is kept for the
89// builder chain.
90#[derive(Clone)]
91#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
92pub struct Config {
93 pub(crate) pager: PagerConfig,
94 pub(crate) busy_timeout: Duration,
95 pub(crate) readonly: bool,
96 pub(crate) cross_process_lock: bool,
97 pub(crate) skip_open_check: bool,
98}
99
100impl std::fmt::Debug for Config {
101 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102 // The embedded `pager` already has a manual `Debug` that
103 // redacts the encryption key.
104 f.debug_struct("Config")
105 .field("pager", &self.pager)
106 .field("busy_timeout", &self.busy_timeout)
107 .field("readonly", &self.readonly)
108 .field("cross_process_lock", &self.cross_process_lock)
109 .field("skip_open_check", &self.skip_open_check)
110 .finish()
111 }
112}
113
114impl Default for Config {
115 fn default() -> Self {
116 Self {
117 pager: PagerConfig::default(),
118 busy_timeout: obj_core::DEFAULT_BUSY_TIMEOUT,
119 readonly: false,
120 cross_process_lock: true,
121 skip_open_check: false,
122 }
123 }
124}
125
126impl Config {
127 /// Set the pager's LRU cache size, in bytes. Rounded down to
128 /// the nearest 4 KiB page and clamped into the supported range.
129 ///
130 /// A `bytes` value smaller than one page is clamped UP to a
131 /// single frame (the pager requires at least one); a value large
132 /// enough to exceed `MAX_CACHE_FRAMES` is clamped DOWN to that
133 /// ceiling. This keeps the builder infallible so it chains like
134 /// every other setter (issue #47) — there is no out-of-range
135 /// error to surface.
136 #[must_use]
137 pub fn cache_size(self, bytes: usize) -> Self {
138 let frames = (bytes / obj_core::pager::PAGER_PAGE_SIZE).clamp(1, MAX_CACHE_FRAMES);
139 // `frames` is guaranteed `>= 1` by the clamp, so the pager's
140 // only failure mode (`cache_frames == 0`) cannot occur; map
141 // the impossible error to the unclamped pager rather than
142 // panicking (Power-of-ten Rule 7).
143 //
144 // Issue #31: `with_cache_frames` consumes the pager `Config`
145 // by value and returns `Result<Config, _>` without handing the
146 // input back on error, so the previous "fall back to `self`"
147 // arm relied on the pager `Config` being `Copy`. The pager
148 // `Config` is no longer `Copy` on the `encryption` build (its
149 // `encryption_key` zeroizes on drop). Mutate the public
150 // `cache_frames` field in place instead: `frames` is clamped
151 // `>= 1`, so this is exactly the success path of
152 // `with_cache_frames` with no fallible move to recover from.
153 let mut pager = self.pager;
154 pager.cache_frames = frames;
155 Self { pager, ..self }
156 }
157
158 /// Set the durability mode the WAL uses for every commit.
159 #[must_use]
160 pub fn sync_mode(self, mode: SyncMode) -> Self {
161 Self {
162 pager: self.pager.with_sync_mode(mode),
163 ..self
164 }
165 }
166
167 /// Set the cross-process / in-process busy-lock timeout.
168 /// `WriteTxn::begin` and `ReadTxn::begin` return
169 /// `Err(Error::Busy)` if the relevant lock cannot be acquired
170 /// within this budget.
171 #[must_use]
172 pub fn busy_timeout(self, timeout: Duration) -> Self {
173 Self {
174 busy_timeout: timeout,
175 ..self
176 }
177 }
178
179 /// Skip the lightweight open-time integrity check (M11 #91).
180 ///
181 /// By default (`false`), [`crate::Db::open`] / [`crate::Db::open_with`]
182 /// run a fast subset of [`crate::Db::integrity_check`] before
183 /// returning: file-header CRC, catalog root sanity, catalog
184 /// B-tree CRC + invariants, and per-collection pointer-range
185 /// validation. The walk is bounded to the catalog tree only —
186 /// no per-collection deep walk — so the cost is essentially
187 /// independent of the database's total size.
188 ///
189 /// Set to `true` to opt out. The knob exists for narrow use
190 /// cases — fault-injection harnesses that deliberately open a
191 /// corrupted DB to exercise downstream error paths, hot-reload
192 /// tooling that re-opens the same file many times per second,
193 /// or developer workflows that have just run a full
194 /// `Db::integrity_check` and don't want to repeat the catalog
195 /// portion. Production callers SHOULD leave it on.
196 ///
197 /// Skipping the open check does NOT bypass detection: a
198 /// corrupted page surfaces on the first operation that touches
199 /// it. Note also that the obj `Db` constructor performs an
200 /// implicit `Catalog::open_or_init` that reads the catalog
201 /// B-tree's reserved row; a DB whose catalog tree is so
202 /// corrupted that descend fails will still error out of the
203 /// open path even with `skip_open_check(true)`. The knob's
204 /// guarantee is "no EXTRA walk beyond what was already
205 /// required to construct the Db handle."
206 #[must_use]
207 pub fn skip_open_check(self, skip: bool) -> Self {
208 Self {
209 skip_open_check: skip,
210 ..self
211 }
212 }
213
214 /// Phase 3 (issue #8): select per-page compression for new
215 /// files.
216 ///
217 /// `mode = CompressionMode::Lz4` causes [`crate::Db::open_with`]
218 /// to create a brand-new database file at `format_minor = 2`
219 /// (the v1.0 feature-complete minor; the LZ4 layer is signalled
220 /// by a per-page flag bit, not by the minor version) with the
221 /// LZ4 page-compression layer engaged. Pages are
222 /// compressed at the pager layer only — every higher-level
223 /// encoder (B-tree, freelist, catalog, document) still
224 /// operates on the 4092-byte logical body. Compression is
225 /// fully transparent to user code.
226 ///
227 /// **No-op against existing files.** When the database file
228 /// already exists, its on-disk header dictates whether
229 /// compression is in use; this knob is consulted only on
230 /// file creation. Opening an existing `format_minor = 0`
231 /// (uncompressed) file with
232 /// `Config::compression(CompressionMode::Lz4)` does NOT
233 /// upgrade the file; reads and writes continue to use the
234 /// uncompressed layout. Migrating an existing database to
235 /// compression is deferred to a future tool.
236 ///
237 /// **Build-time requirement.** Compression requires the
238 /// `compression` Cargo feature on the `obj-db` crate (which
239 /// in turn enables `obj-core/compression`). A build WITHOUT
240 /// the feature that calls `Config::compression(Lz4)` will
241 /// return `Error::FormatFeatureUnsupported { feature:
242 /// "compression" }` from `Db::open_with` at the moment a
243 /// new file would otherwise be created.
244 #[must_use]
245 pub fn compression(self, mode: CompressionMode) -> Self {
246 Self {
247 pager: self.pager.with_compression_mode(mode),
248 ..self
249 }
250 }
251
252 /// Phase 4 (issue #9): supply a 32-byte master encryption key.
253 ///
254 /// When set on a **new** database file, the file is created at
255 /// `format_minor = 2` with `feature_flags` bit 1 set and a fresh
256 /// CSPRNG-generated `kdf_salt` stored plaintext in the page-0
257 /// header. Every non-header page is encrypted with
258 /// XChaCha20-Poly1305: 4096-byte logical page → 4136-byte
259 /// physical page (24-byte nonce + 16-byte tag).
260 ///
261 /// When set on an **existing** database file:
262 /// - If the file is `format_minor = 2` (encryption-capable):
263 /// the key is used to derive the per-file page key via
264 /// `HKDF-SHA256(key, kdf_salt, b"obj-page-encryption-v1")`.
265 /// A wrong key surfaces as
266 /// [`Error::EncryptionKeyInvalid`](obj_core::Error::EncryptionKeyInvalid)
267 /// on the first encrypted page read.
268 /// - If the file is `format_minor < 2`: open returns
269 /// [`Error::EncryptionKeyMismatch`](obj_core::Error::EncryptionKeyMismatch).
270 ///
271 /// **Build-time requirement.** Encryption requires the
272 /// `encryption` Cargo feature on the `obj-db` crate (which
273 /// propagates to `obj-core/encryption`). A build WITHOUT the
274 /// feature that sets a key returns
275 /// [`Error::FormatFeatureUnsupported`](obj_core::Error::FormatFeatureUnsupported)
276 /// from `Db::open_with` (`feature = "encryption"`).
277 ///
278 /// The key is held in memory inside the obj-core pager
279 /// [`Config`](obj_core::pager::Config); the `Debug` impl
280 /// redacts it (`encryption_key: "<set>"`). The key is NOT
281 /// persisted to disk. Callers are responsible for the master
282 /// key's lifecycle (storage, rotation, derivation from a
283 /// passphrase via Argon2/scrypt, etc. — those are out of
284 /// scope for this issue).
285 ///
286 /// Zeroising the key on drop (via the `zeroize` crate) is a
287 /// known follow-up; it is not implemented here.
288 #[must_use]
289 pub fn encryption_key(self, key: [u8; 32]) -> Self {
290 Self {
291 pager: self.pager.with_encryption_key(Some(key)),
292 ..self
293 }
294 }
295
296 /// Enable / disable the cross-process file lock layer.
297 ///
298 /// When `false`, the [`Db`](crate::Db) opened with this config
299 /// does NOT acquire OS-level byte-range locks on the database
300 /// file. Used by the M6 #49 concurrent stress test where every
301 /// thread shares one `Db` (and therefore one file descriptor):
302 /// POSIX OFD locks are per-fd, so multiple threads on the same
303 /// fd cannot use the lock to enforce inter-thread exclusion —
304 /// that's what the in-process write-serialization mutex is for.
305 ///
306 /// Default: `true` (cross-process locking enabled).
307 #[must_use]
308 pub fn cross_process_lock(self, enabled: bool) -> Self {
309 Self {
310 cross_process_lock: enabled,
311 ..self
312 }
313 }
314}