lsm_tree/error.rs
1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2024-present, fjall-rs
3// Copyright (c) 2026-present, Structured World Foundation
4
5use crate::{Checksum, CompressionType};
6#[cfg(not(feature = "std"))]
7use alloc::string::String;
8
9/// Represents errors that can occur in the LSM-tree
10#[derive(Debug)]
11#[non_exhaustive]
12pub enum Error {
13 /// I/O error
14 Io(crate::io::Error),
15
16 /// Decompression failed
17 Decompress(CompressionType),
18
19 /// Invalid or unparsable data format version
20 InvalidVersion(u8),
21
22 /// Some required files could not be recovered from disk
23 Unrecoverable,
24
25 /// Checksum mismatch
26 ChecksumMismatch {
27 /// Checksum of loaded block
28 got: Checksum,
29
30 /// Checksum that was saved in block header
31 expected: Checksum,
32 },
33
34 /// A memtable entry's per-KV digest, computed at insert under
35 /// [`KvChecksumComputePoint::AtInsert`](crate::runtime_config::KvChecksumComputePoint::AtInsert),
36 /// did not match a recompute over the entry's current bytes at flush.
37 ///
38 /// This is the memtable-residence RAM-corruption signal: the entry's
39 /// logical content (`value_type`, `seqno`, key, or value) changed while it
40 /// sat in the memtable, between insert and flush. Distinct from
41 /// [`Self::ChecksumMismatch`] (on-disk block bytes) — this catches a flip
42 /// that happens entirely in RAM, before any block is written.
43 MemtableKvChecksumMismatch {
44 /// Sequence number of the entry whose digest diverged (locates it).
45 seqno: u64,
46
47 /// Digest recomputed over the entry's current memtable bytes at flush.
48 got: u64,
49
50 /// Digest computed and stored when the entry was inserted.
51 expected: u64,
52 },
53
54 /// A memtable entry carried an insert-time per-KV digest
55 /// ([`KvChecksumComputePoint::AtInsert`](crate::runtime_config::KvChecksumComputePoint::AtInsert))
56 /// tagged with an algorithm `AtInsert` never stores: a non-4-byte or
57 /// unknown algorithm wire tag.
58 ///
59 /// `AtInsert` only ever writes a 4-byte algorithm tag (`Xxh3Low32` /
60 /// `Crc32c`), so a digest-bearing node tagged otherwise means the node's
61 /// algorithm metadata was corrupted in RAM during memtable residence.
62 /// Distinct from [`Self::MemtableKvChecksumMismatch`] (the digest value
63 /// diverged): here the algorithm itself is unusable, so the engine refuses
64 /// to "verify" the entry under the wrong algorithm rather than risk a
65 /// flipped tag passing the residence check.
66 MemtableKvChecksumCorruptAlgorithm {
67 /// Sequence number of the entry whose algorithm tag is invalid.
68 seqno: u64,
69
70 /// The invalid algorithm wire tag read from the node.
71 tag: u8,
72 },
73
74 /// Blob frame header CRC mismatch (V4 format).
75 /// Distinct from `ChecksumMismatch` which covers data payload checksums.
76 HeaderCrcMismatch {
77 /// CRC recomputed from header fields
78 recomputed: u32,
79
80 /// CRC stored in the blob frame header
81 stored: u32,
82 },
83
84 /// Invalid enum tag
85 InvalidTag((&'static str, u8)),
86
87 /// Invalid block trailer
88 InvalidTrailer,
89
90 /// Invalid block header
91 InvalidHeader(&'static str),
92
93 /// Data size (decompressed, on-disk, or requested) is invalid or exceeds a safety limit
94 DecompressedSizeTooLarge {
95 /// Size associated with the data being processed. This may come from
96 /// on-disk/in-memory metadata (e.g., header, block/value handle) or be
97 /// derived from caller input (e.g., a requested key or value length),
98 /// and may be zero, invalid, or over the configured limit.
99 declared: u64,
100
101 /// Maximum allowed size for the data or request being processed
102 limit: u64,
103 },
104
105 /// UTF-8 error
106 Utf8(core::str::Utf8Error),
107
108 /// Merge operator failed.
109 ///
110 /// No context payload — consistent with other unit variants
111 /// (`Unrecoverable`, `InvalidTrailer`). Operators should log
112 /// details before returning this error.
113 MergeOperator,
114
115 /// Encryption failed
116 Encrypt(&'static str),
117
118 /// Decryption failed
119 Decrypt(&'static str),
120
121 /// Comparator mismatch on tree reopen.
122 ///
123 /// The tree was created with a comparator whose [`crate::UserComparator::name`]
124 /// differs from the one supplied at reopen time.
125 ComparatorMismatch {
126 /// Comparator name persisted in the tree metadata.
127 stored: String,
128
129 /// Comparator name supplied by the caller.
130 supplied: &'static str,
131 },
132
133 /// Zstd dictionary required but not provided, or `dict_id` mismatch
134 ZstdDictMismatch {
135 /// Dictionary ID stored in the block/table metadata
136 expected: u32,
137
138 /// Dictionary ID provided by the caller (`None` if no dictionary supplied)
139 got: Option<u32>,
140 },
141
142 /// Per-record XXH3-64 mismatch inside a framed manifest section
143 /// (`tables` / `blob_files`). Distinct from
144 /// [`Error::ChecksumMismatch`] — same XXH3 family but a
145 /// different output width (XXH3-64 here vs XXH3-128 for
146 /// block-level payloads) on a different layer of the on-disk
147 /// format, with different recovery semantics (manifest framing
148 /// surfaces routed through `ManifestRecoveryMode`; block
149 /// checksums surface via `Error::ChecksumMismatch` for the
150 /// block I/O paths). Strict manifest recovery modes surface
151 /// this so an operator can see the exact 64-bit digests that
152 /// disagreed; `SkipAnyCorruptedRecords` and
153 /// `PointInTimeRecovery` route around the corruption without
154 /// raising it.
155 ManifestFrameChecksumMismatch {
156 /// SFA section the corrupt record was found in (e.g.
157 /// `"tables"`, `"blob_files"`). Static so this can be
158 /// compared without parsing message strings.
159 section: &'static str,
160 /// XXH3-64 digest the framing header claimed for the
161 /// record's payload.
162 expected: u64,
163 /// XXH3-64 digest the reader recomputed over the bytes
164 /// actually on disk.
165 got: u64,
166 },
167
168 /// Range tombstone block decode failure.
169 RangeTombstoneDecode {
170 /// Which field or validation failed (e.g. `start_len`, `start`, `seqno`, `interval`)
171 field: &'static str,
172
173 /// Byte offset within the block to the start of the field whose decoding failed
174 /// (captured before reading bytes for that field).
175 offset: u64,
176 },
177
178 /// A [`WriteBatch`](crate::WriteBatch) contains mixed operation types
179 /// (e.g. insert + remove) for the same user key.
180 ///
181 /// Mixed ops at the same logical version are rejected because the
182 /// memtable/skiplist ordering ties on `(user_key, seqno)` and does not
183 /// include `value_type` as a tie-breaker. That would otherwise make
184 /// equal-key entries with different operation types ambiguous to later
185 /// reads and merges, yielding tie-break-dependent "last write wins"
186 /// semantics.
187 MixedOperationBatch,
188
189 /// Tree was opened with `Config::page_ecc(true)` but this build of
190 /// the crate does not have the `page_ecc` cargo feature enabled.
191 /// The reader has no way to verify or recover Reed-Solomon parity
192 /// without the codec, so opening such a tree would silently
193 /// downgrade integrity guarantees — return this error instead.
194 PageEccUnsupported,
195
196 /// Block payload failed the XXH3 integrity check and the
197 /// attached Reed-Solomon parity trailer could not reconstruct
198 /// it (more shards are corrupted than the (4, 2) RS scheme
199 /// can recover). Surfaced ONLY by ECC-protected blocks
200 /// (the `ECC_PARITY` header flag set); a block written without parity
201 /// (`Config::page_ecc(false)`) on a checksum mismatch returns
202 /// [`Self::ChecksumMismatch`] instead, because there's no
203 /// parity to even attempt recovery from.
204 PageEccUnrecoverable {
205 /// XXH3 checksum recomputed from the on-disk bytes.
206 got: Checksum,
207 /// XXH3 checksum stored in the block header.
208 expected: Checksum,
209 },
210
211 /// Route-compatibility mismatch on reopen.
212 ///
213 /// Recovery found fewer tables on disk than the manifest expects, and all
214 /// missing tables are on levels not covered by any current
215 /// [`level_routes`](crate::Config::level_routes). This typically means a
216 /// previously configured route was removed, leaving its directory
217 /// unreachable.
218 ///
219 /// Re-adding the missing route(s) will usually resolve the error. If
220 /// missing tables are on levels that *are* covered by a current route,
221 /// recovery returns [`Unrecoverable`](Self::Unrecoverable) instead
222 /// (the SST files were genuinely lost).
223 RouteMismatch {
224 /// Number of tables listed in the manifest.
225 expected: usize,
226
227 /// Number of tables actually found across all configured routes.
228 found: usize,
229 },
230
231 /// Valid configuration / on-disk layout that this build does not
232 /// yet know how to process, or constructor input that violates a
233 /// documented invariant (e.g. `CompressionType::None` passed to
234 /// [`crate::table::block::CompressionContext::new`]). Distinct
235 /// from [`Error::Unrecoverable`] (signals corruption) and from
236 /// [`Error::Io`] with `ErrorKind::Unsupported` (which can also
237 /// surface from platform / backend limits); the `&'static str`
238 /// payload names the specific marker that triggered the rejection
239 /// (e.g. `"filter_tli"` for a partitioned filter SFA section,
240 /// `"compression-context-none"` for the constructor invariant) so
241 /// the caller can route the diagnostic without parsing message
242 /// strings.
243 FeatureUnsupported(&'static str),
244
245 /// The tree directory is already locked by another live instance.
246 ///
247 /// Returned by [`Config::open`](crate::Config::open) and
248 /// [`Config::repair`](crate::Config::repair) when the cross-process
249 /// directory lock (a `LOCK` file under the tree directory, held via an
250 /// advisory OS file lock) could not be acquired because another process
251 /// owns it. Holds the directory path as a display string for diagnostics.
252 /// Two processes mutating the same manifest would corrupt it, so the second
253 /// acquirer fails fast here. Disable the lock with
254 /// [`Config::with_directory_lock`](crate::Config::with_directory_lock) only
255 /// when exclusivity is enforced at a higher layer.
256 Locked(String),
257
258 /// Manifest footer / TOC / file-level discovery failure.
259 ///
260 /// Scoped to errors detected at or before the TOC is parsed —
261 /// i.e. everything the reader needs *before* it can answer
262 /// "where is section X". Section-content failures (a specific
263 /// section's Block fails verification) go through
264 /// [`ManifestSectionInvalid`](Error::ManifestSectionInvalid)
265 /// instead so callers like `Tree::open` can distinguish a
266 /// totally unreadable manifest from a per-section problem.
267 ///
268 /// Typical causes:
269 ///
270 /// - **Footer-payload structural failure:** unknown layout
271 /// version, oversized section count, empty/oversized section
272 /// name, invalid UTF-8, duplicate section name, footer
273 /// payload exceeds the 4 KiB reservation.
274 /// - **Tail / head-mirror double failure:** both the
275 /// tail-footer Block read and the head-mirror fallback
276 /// failed verification (XXH3 mismatch, AEAD decryption,
277 /// parse error). Per-path causes are logged at `error`
278 /// level and collapsed here.
279 /// - **TOC entry value corruption:** a TOC entry's
280 /// `block_offset + block_size` overflows `u64` or extends
281 /// past the end of the file. The TOC bytes are footer
282 /// payload, so a malformed TOC entry is a footer-level
283 /// issue even though it surfaces in
284 /// `ManifestArchiveReader::read_section`.
285 /// - **Trailing size-hint corruption:** the tail's 4-byte
286 /// footer-size hint is zero or exceeds
287 /// `HEAD_FOOTER_RESERVED_SIZE` (4 KiB), or the implied
288 /// `section_end` lands inside the head reservation. Caught
289 /// in both the reader and `checkpoint::write_current_for_version`.
290 /// - **Writer-side invariant breach:** `write_cursor` would
291 /// overflow `u64`, an in-memory section would exceed the
292 /// on-disk Block-size cap, etc.
293 /// - **CURRENT pointer points at a missing manifest:** when
294 /// `version::get_current_version` opens the referenced
295 /// `v{N}` file and gets `NotFound`, the error is rewrapped
296 /// here so `Tree::open`'s outer `Io(NotFound) => create_new`
297 /// arm cannot mistake a half-applied recovery / corrupted
298 /// state for a clean first-open.
299 ManifestFooterInvalid(&'static str),
300
301 /// Manifest section content failed verification or matched no
302 /// TOC entry.
303 ///
304 /// Surfaced by `ManifestArchiveReader::read_section` (and the
305 /// helper that validates the inner Block header before
306 /// delegating to `Block::from_reader`). Distinct from
307 /// [`ManifestFooterInvalid`](Error::ManifestFooterInvalid)
308 /// because the footer / TOC loaded fine — the bad bytes are
309 /// localised to one section Block and a caller MAY route
310 /// recovery differently (e.g. skip the section vs. refuse
311 /// the whole manifest).
312 ///
313 /// Causes:
314 ///
315 /// - **Requested section name not in TOC:** the caller asked
316 /// for a section that the manifest doesn't declare.
317 /// - **Section Block header doesn't fit its outer buffer:**
318 /// the inner block's derived on-disk size (header + payload +
319 /// parity-if-flagged) exceeds the TOC-declared `block_size`.
320 /// Defence-in-depth against a forged TOC pointing at a too-small
321 /// slot.
322 /// - **Block decoded at the TOC offset has the wrong
323 /// `block_type`:** TOC says "section here" but the bytes
324 /// carry a non-`Manifest` Block. Defence-in-depth against
325 /// TOC-redirect attacks; once AAD-binding lands in
326 /// `encryption::block`, `Block::from_reader` will reject
327 /// this internally and the check here becomes belt-and-
328 /// braces.
329 ManifestSectionInvalid(&'static str),
330
331 /// The trailing record of the incremental manifest edit log is
332 /// incomplete or corrupt, and the active
333 /// [`ManifestRecoveryMode`](crate::config::ManifestRecoveryMode) does
334 /// not tolerate that defect, so the open aborts rather than silently
335 /// rolling the edit back.
336 ///
337 /// A clean end-of-log is never reported here: a crash exactly at a
338 /// record boundary is byte-identical to a pristine close, so that
339 /// case is always tolerated. This fires when bytes of a trailing
340 /// record are present but the record fails framing — a
341 /// power-loss-truncated append (only
342 /// [`AbsoluteConsistency`](crate::config::ManifestRecoveryMode::AbsoluteConsistency)
343 /// rejects it; other modes roll it back), or a fully-framed record
344 /// whose checksum doesn't match (bit-rot) / whose header is forged
345 /// (rejected by both `AbsoluteConsistency` and
346 /// [`TolerateCorruptedTailRecords`](crate::config::ManifestRecoveryMode::TolerateCorruptedTailRecords),
347 /// which salvages writer-incomplete tails only; rolled back under
348 /// `PointInTimeRecovery` / `SkipAnyCorruptedRecords`).
349 ///
350 /// Recover by truncating the torn tail: run
351 /// [`Config::repair`](crate::Config::repair), which rebuilds a clean
352 /// standalone snapshot (dropping the edit log), or re-open under a
353 /// [`ManifestRecoveryMode`](crate::config::ManifestRecoveryMode) that
354 /// tolerates the defect to roll the trailing edit back.
355 TornManifestEditLog {
356 /// The trailing defect detected: `"truncated"` (partial record
357 /// from a power-loss-interrupted append), `"checksum-mismatch"`
358 /// (fully-framed record whose payload bit-rotted),
359 /// `"bad-header"` (implausible framing length), or
360 /// `"len-mismatch"` (record length disagrees with the expected
361 /// fixed size). Static so callers can branch without parsing
362 /// the message string.
363 kind: &'static str,
364 },
365
366 /// A write was declined by the storage admission gate because accepting it
367 /// could push the tree's live footprint past its effective budget.
368 ///
369 /// Only produced when [`storage_admission_check`](crate::runtime_config::RuntimeConfig::storage_admission_check)
370 /// is enabled. The predicate is computed, not latched: raising
371 /// [`storage_limit_bytes`](crate::runtime_config::RuntimeConfig::storage_limit_bytes),
372 /// freeing disk, or a compaction reclaiming space clears the read-only
373 /// state on the next check with no restart. Internal flush / compaction are
374 /// never gated (reserved headroom), so the engine can always reclaim space.
375 StorageFull {
376 /// Live on-disk bytes at the time of the check.
377 used: u64,
378
379 /// Effective byte budget that `used` (plus reserved headroom) exceeded.
380 limit: u64,
381 },
382}
383
384impl core::fmt::Display for Error {
385 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
386 write!(f, "LsmTreeError: {self:?}")
387 }
388}
389
390impl core::error::Error for Error {
391 fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
392 match self {
393 Self::Io(e) => Some(e),
394 _ => None,
395 }
396 }
397}
398
399impl From<crate::sfa::Error> for Error {
400 fn from(value: crate::sfa::Error) -> Self {
401 match value {
402 crate::sfa::Error::Io(e) => Self::from(e),
403 crate::sfa::Error::ChecksumMismatch { got, expected } => {
404 log::error!("Archive ToC checksum mismatch");
405 Self::ChecksumMismatch {
406 got: got.into(),
407 expected: expected.into(),
408 }
409 }
410 crate::sfa::Error::InvalidHeader => {
411 log::error!("Invalid archive header");
412 Self::Unrecoverable
413 }
414 crate::sfa::Error::InvalidVersion => {
415 log::error!("Invalid archive version");
416 Self::Unrecoverable
417 }
418 crate::sfa::Error::UnsupportedChecksumType => {
419 log::error!("Invalid archive checksum type");
420 Self::Unrecoverable
421 }
422 }
423 }
424}
425
426// The `Io` variant carries `crate::io::Error` (the no_std-capable I/O error),
427// so this bridge is a direct wrap. Std file-I/O paths surface `std::io::Error`;
428// the std-gated bridge below folds those through `crate::io::Error`.
429impl From<crate::io::Error> for Error {
430 fn from(value: crate::io::Error) -> Self {
431 Self::Io(value)
432 }
433}
434
435#[cfg(feature = "std")]
436impl From<std::io::Error> for Error {
437 fn from(value: std::io::Error) -> Self {
438 Self::Io(value.into())
439 }
440}
441
442/// Tree result
443pub type Result<T> = core::result::Result<T, Error>;