Skip to main content

lsm_tree/
error.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright (c) 2024-present, fjall-rs
3// Copyright (c) 2026-present, Structured World Foundation
4
5use crate::{Checksum, CompressionType};
6#[cfg(not(feature = "std"))]
7use alloc::string::String;
8
9/// Represents errors that can occur in the LSM-tree
10#[derive(Debug)]
11#[non_exhaustive]
12pub enum Error {
13    /// I/O error
14    Io(crate::io::Error),
15
16    /// Decompression failed
17    Decompress(CompressionType),
18
19    /// Invalid or unparsable data format version
20    InvalidVersion(u8),
21
22    /// Some required files could not be recovered from disk
23    Unrecoverable,
24
25    /// Checksum mismatch
26    ChecksumMismatch {
27        /// Checksum of loaded block
28        got: Checksum,
29
30        /// Checksum that was saved in block header
31        expected: Checksum,
32    },
33
34    /// A memtable entry's per-KV digest, computed at insert under
35    /// [`KvChecksumComputePoint::AtInsert`](crate::runtime_config::KvChecksumComputePoint::AtInsert),
36    /// did not match a recompute over the entry's current bytes at flush.
37    ///
38    /// This is the memtable-residence RAM-corruption signal: the entry's
39    /// logical content (`value_type`, `seqno`, key, or value) changed while it
40    /// sat in the memtable, between insert and flush. Distinct from
41    /// [`Self::ChecksumMismatch`] (on-disk block bytes) — this catches a flip
42    /// that happens entirely in RAM, before any block is written.
43    MemtableKvChecksumMismatch {
44        /// Sequence number of the entry whose digest diverged (locates it).
45        seqno: u64,
46
47        /// Digest recomputed over the entry's current memtable bytes at flush.
48        got: u64,
49
50        /// Digest computed and stored when the entry was inserted.
51        expected: u64,
52    },
53
54    /// A memtable entry carried an insert-time per-KV digest
55    /// ([`KvChecksumComputePoint::AtInsert`](crate::runtime_config::KvChecksumComputePoint::AtInsert))
56    /// tagged with an algorithm `AtInsert` never stores: a non-4-byte or
57    /// unknown algorithm wire tag.
58    ///
59    /// `AtInsert` only ever writes a 4-byte algorithm tag (`Xxh3Low32` /
60    /// `Crc32c`), so a digest-bearing node tagged otherwise means the node's
61    /// algorithm metadata was corrupted in RAM during memtable residence.
62    /// Distinct from [`Self::MemtableKvChecksumMismatch`] (the digest value
63    /// diverged): here the algorithm itself is unusable, so the engine refuses
64    /// to "verify" the entry under the wrong algorithm rather than risk a
65    /// flipped tag passing the residence check.
66    MemtableKvChecksumCorruptAlgorithm {
67        /// Sequence number of the entry whose algorithm tag is invalid.
68        seqno: u64,
69
70        /// The invalid algorithm wire tag read from the node.
71        tag: u8,
72    },
73
74    /// Blob frame header CRC mismatch (V4 format).
75    /// Distinct from `ChecksumMismatch` which covers data payload checksums.
76    HeaderCrcMismatch {
77        /// CRC recomputed from header fields
78        recomputed: u32,
79
80        /// CRC stored in the blob frame header
81        stored: u32,
82    },
83
84    /// Invalid enum tag
85    InvalidTag((&'static str, u8)),
86
87    /// Invalid block trailer
88    InvalidTrailer,
89
90    /// Invalid block header
91    InvalidHeader(&'static str),
92
93    /// Data size (decompressed, on-disk, or requested) is invalid or exceeds a safety limit
94    DecompressedSizeTooLarge {
95        /// Size associated with the data being processed. This may come from
96        /// on-disk/in-memory metadata (e.g., header, block/value handle) or be
97        /// derived from caller input (e.g., a requested key or value length),
98        /// and may be zero, invalid, or over the configured limit.
99        declared: u64,
100
101        /// Maximum allowed size for the data or request being processed
102        limit: u64,
103    },
104
105    /// UTF-8 error
106    Utf8(core::str::Utf8Error),
107
108    /// Merge operator failed.
109    ///
110    /// No context payload — consistent with other unit variants
111    /// (`Unrecoverable`, `InvalidTrailer`). Operators should log
112    /// details before returning this error.
113    MergeOperator,
114
115    /// Encryption failed
116    Encrypt(&'static str),
117
118    /// Decryption failed
119    Decrypt(&'static str),
120
121    /// Comparator mismatch on tree reopen.
122    ///
123    /// The tree was created with a comparator whose [`crate::UserComparator::name`]
124    /// differs from the one supplied at reopen time.
125    ComparatorMismatch {
126        /// Comparator name persisted in the tree metadata.
127        stored: String,
128
129        /// Comparator name supplied by the caller.
130        supplied: &'static str,
131    },
132
133    /// Zstd dictionary required but not provided, or `dict_id` mismatch
134    ZstdDictMismatch {
135        /// Dictionary ID stored in the block/table metadata
136        expected: u32,
137
138        /// Dictionary ID provided by the caller (`None` if no dictionary supplied)
139        got: Option<u32>,
140    },
141
142    /// Per-record XXH3-64 mismatch inside a framed manifest section
143    /// (`tables` / `blob_files`). Distinct from
144    /// [`Error::ChecksumMismatch`] — same XXH3 family but a
145    /// different output width (XXH3-64 here vs XXH3-128 for
146    /// block-level payloads) on a different layer of the on-disk
147    /// format, with different recovery semantics (manifest framing
148    /// surfaces routed through `ManifestRecoveryMode`; block
149    /// checksums surface via `Error::ChecksumMismatch` for the
150    /// block I/O paths). Strict manifest recovery modes surface
151    /// this so an operator can see the exact 64-bit digests that
152    /// disagreed; `SkipAnyCorruptedRecords` and
153    /// `PointInTimeRecovery` route around the corruption without
154    /// raising it.
155    ManifestFrameChecksumMismatch {
156        /// SFA section the corrupt record was found in (e.g.
157        /// `"tables"`, `"blob_files"`). Static so this can be
158        /// compared without parsing message strings.
159        section: &'static str,
160        /// XXH3-64 digest the framing header claimed for the
161        /// record's payload.
162        expected: u64,
163        /// XXH3-64 digest the reader recomputed over the bytes
164        /// actually on disk.
165        got: u64,
166    },
167
168    /// Range tombstone block decode failure.
169    RangeTombstoneDecode {
170        /// Which field or validation failed (e.g. `start_len`, `start`, `seqno`, `interval`)
171        field: &'static str,
172
173        /// Byte offset within the block to the start of the field whose decoding failed
174        /// (captured before reading bytes for that field).
175        offset: u64,
176    },
177
178    /// A [`WriteBatch`](crate::WriteBatch) contains mixed operation types
179    /// (e.g. insert + remove) for the same user key.
180    ///
181    /// Mixed ops at the same logical version are rejected because the
182    /// memtable/skiplist ordering ties on `(user_key, seqno)` and does not
183    /// include `value_type` as a tie-breaker. That would otherwise make
184    /// equal-key entries with different operation types ambiguous to later
185    /// reads and merges, yielding tie-break-dependent "last write wins"
186    /// semantics.
187    MixedOperationBatch,
188
189    /// Tree was opened with `Config::page_ecc(true)` but this build of
190    /// the crate does not have the `page_ecc` cargo feature enabled.
191    /// The reader has no way to verify or recover Reed-Solomon parity
192    /// without the codec, so opening such a tree would silently
193    /// downgrade integrity guarantees — return this error instead.
194    PageEccUnsupported,
195
196    /// Block payload failed the XXH3 integrity check and the
197    /// attached Reed-Solomon parity trailer could not reconstruct
198    /// it (more shards are corrupted than the (4, 2) RS scheme
199    /// can recover). Surfaced ONLY by ECC-protected blocks
200    /// (the `ECC_PARITY` header flag set); a block written without parity
201    /// (`Config::page_ecc(false)`) on a checksum mismatch returns
202    /// [`Self::ChecksumMismatch`] instead, because there's no
203    /// parity to even attempt recovery from.
204    PageEccUnrecoverable {
205        /// XXH3 checksum recomputed from the on-disk bytes.
206        got: Checksum,
207        /// XXH3 checksum stored in the block header.
208        expected: Checksum,
209    },
210
211    /// Route-compatibility mismatch on reopen.
212    ///
213    /// Recovery found fewer tables on disk than the manifest expects, and all
214    /// missing tables are on levels not covered by any current
215    /// [`level_routes`](crate::Config::level_routes).  This typically means a
216    /// previously configured route was removed, leaving its directory
217    /// unreachable.
218    ///
219    /// Re-adding the missing route(s) will usually resolve the error.  If
220    /// missing tables are on levels that *are* covered by a current route,
221    /// recovery returns [`Unrecoverable`](Self::Unrecoverable) instead
222    /// (the SST files were genuinely lost).
223    RouteMismatch {
224        /// Number of tables listed in the manifest.
225        expected: usize,
226
227        /// Number of tables actually found across all configured routes.
228        found: usize,
229    },
230
231    /// Valid configuration / on-disk layout that this build does not
232    /// yet know how to process, or constructor input that violates a
233    /// documented invariant (e.g. `CompressionType::None` passed to
234    /// [`crate::table::block::CompressionContext::new`]). Distinct
235    /// from [`Error::Unrecoverable`] (signals corruption) and from
236    /// [`Error::Io`] with `ErrorKind::Unsupported` (which can also
237    /// surface from platform / backend limits); the `&'static str`
238    /// payload names the specific marker that triggered the rejection
239    /// (e.g. `"filter_tli"` for a partitioned filter SFA section,
240    /// `"compression-context-none"` for the constructor invariant) so
241    /// the caller can route the diagnostic without parsing message
242    /// strings.
243    FeatureUnsupported(&'static str),
244
245    /// The tree directory is already locked by another live instance.
246    ///
247    /// Returned by [`Config::open`](crate::Config::open) and
248    /// [`Config::repair`](crate::Config::repair) when the cross-process
249    /// directory lock (a `LOCK` file under the tree directory, held via an
250    /// advisory OS file lock) could not be acquired because another process
251    /// owns it. Holds the directory path as a display string for diagnostics.
252    /// Two processes mutating the same manifest would corrupt it, so the second
253    /// acquirer fails fast here. Disable the lock with
254    /// [`Config::with_directory_lock`](crate::Config::with_directory_lock) only
255    /// when exclusivity is enforced at a higher layer.
256    Locked(String),
257
258    /// Manifest footer / TOC / file-level discovery failure.
259    ///
260    /// Scoped to errors detected at or before the TOC is parsed —
261    /// i.e. everything the reader needs *before* it can answer
262    /// "where is section X". Section-content failures (a specific
263    /// section's Block fails verification) go through
264    /// [`ManifestSectionInvalid`](Error::ManifestSectionInvalid)
265    /// instead so callers like `Tree::open` can distinguish a
266    /// totally unreadable manifest from a per-section problem.
267    ///
268    /// Typical causes:
269    ///
270    /// - **Footer-payload structural failure:** unknown layout
271    ///   version, oversized section count, empty/oversized section
272    ///   name, invalid UTF-8, duplicate section name, footer
273    ///   payload exceeds the 4 KiB reservation.
274    /// - **Tail / head-mirror double failure:** both the
275    ///   tail-footer Block read and the head-mirror fallback
276    ///   failed verification (XXH3 mismatch, AEAD decryption,
277    ///   parse error). Per-path causes are logged at `error`
278    ///   level and collapsed here.
279    /// - **TOC entry value corruption:** a TOC entry's
280    ///   `block_offset + block_size` overflows `u64` or extends
281    ///   past the end of the file. The TOC bytes are footer
282    ///   payload, so a malformed TOC entry is a footer-level
283    ///   issue even though it surfaces in
284    ///   `ManifestArchiveReader::read_section`.
285    /// - **Trailing size-hint corruption:** the tail's 4-byte
286    ///   footer-size hint is zero or exceeds
287    ///   `HEAD_FOOTER_RESERVED_SIZE` (4 KiB), or the implied
288    ///   `section_end` lands inside the head reservation. Caught
289    ///   in both the reader and `checkpoint::write_current_for_version`.
290    /// - **Writer-side invariant breach:** `write_cursor` would
291    ///   overflow `u64`, an in-memory section would exceed the
292    ///   on-disk Block-size cap, etc.
293    /// - **CURRENT pointer points at a missing manifest:** when
294    ///   `version::get_current_version` opens the referenced
295    ///   `v{N}` file and gets `NotFound`, the error is rewrapped
296    ///   here so `Tree::open`'s outer `Io(NotFound) => create_new`
297    ///   arm cannot mistake a half-applied recovery / corrupted
298    ///   state for a clean first-open.
299    ManifestFooterInvalid(&'static str),
300
301    /// Manifest section content failed verification or matched no
302    /// TOC entry.
303    ///
304    /// Surfaced by `ManifestArchiveReader::read_section` (and the
305    /// helper that validates the inner Block header before
306    /// delegating to `Block::from_reader`). Distinct from
307    /// [`ManifestFooterInvalid`](Error::ManifestFooterInvalid)
308    /// because the footer / TOC loaded fine — the bad bytes are
309    /// localised to one section Block and a caller MAY route
310    /// recovery differently (e.g. skip the section vs. refuse
311    /// the whole manifest).
312    ///
313    /// Causes:
314    ///
315    /// - **Requested section name not in TOC:** the caller asked
316    ///   for a section that the manifest doesn't declare.
317    /// - **Section Block header doesn't fit its outer buffer:**
318    ///   the inner block's derived on-disk size (header + payload +
319    ///   parity-if-flagged) exceeds the TOC-declared `block_size`.
320    ///   Defence-in-depth against a forged TOC pointing at a too-small
321    ///   slot.
322    /// - **Block decoded at the TOC offset has the wrong
323    ///   `block_type`:** TOC says "section here" but the bytes
324    ///   carry a non-`Manifest` Block. Defence-in-depth against
325    ///   TOC-redirect attacks; once AAD-binding lands in
326    ///   `encryption::block`, `Block::from_reader` will reject
327    ///   this internally and the check here becomes belt-and-
328    ///   braces.
329    ManifestSectionInvalid(&'static str),
330
331    /// The trailing record of the incremental manifest edit log is
332    /// incomplete or corrupt, and the active
333    /// [`ManifestRecoveryMode`](crate::config::ManifestRecoveryMode) does
334    /// not tolerate that defect, so the open aborts rather than silently
335    /// rolling the edit back.
336    ///
337    /// A clean end-of-log is never reported here: a crash exactly at a
338    /// record boundary is byte-identical to a pristine close, so that
339    /// case is always tolerated. This fires when bytes of a trailing
340    /// record are present but the record fails framing — a
341    /// power-loss-truncated append (only
342    /// [`AbsoluteConsistency`](crate::config::ManifestRecoveryMode::AbsoluteConsistency)
343    /// rejects it; other modes roll it back), or a fully-framed record
344    /// whose checksum doesn't match (bit-rot) / whose header is forged
345    /// (rejected by both `AbsoluteConsistency` and
346    /// [`TolerateCorruptedTailRecords`](crate::config::ManifestRecoveryMode::TolerateCorruptedTailRecords),
347    /// which salvages writer-incomplete tails only; rolled back under
348    /// `PointInTimeRecovery` / `SkipAnyCorruptedRecords`).
349    ///
350    /// Recover by truncating the torn tail: run
351    /// [`Config::repair`](crate::Config::repair), which rebuilds a clean
352    /// standalone snapshot (dropping the edit log), or re-open under a
353    /// [`ManifestRecoveryMode`](crate::config::ManifestRecoveryMode) that
354    /// tolerates the defect to roll the trailing edit back.
355    TornManifestEditLog {
356        /// The trailing defect detected: `"truncated"` (partial record
357        /// from a power-loss-interrupted append), `"checksum-mismatch"`
358        /// (fully-framed record whose payload bit-rotted),
359        /// `"bad-header"` (implausible framing length), or
360        /// `"len-mismatch"` (record length disagrees with the expected
361        /// fixed size). Static so callers can branch without parsing
362        /// the message string.
363        kind: &'static str,
364    },
365
366    /// A write was declined by the storage admission gate because accepting it
367    /// could push the tree's live footprint past its effective budget.
368    ///
369    /// Only produced when [`storage_admission_check`](crate::runtime_config::RuntimeConfig::storage_admission_check)
370    /// is enabled. The predicate is computed, not latched: raising
371    /// [`storage_limit_bytes`](crate::runtime_config::RuntimeConfig::storage_limit_bytes),
372    /// freeing disk, or a compaction reclaiming space clears the read-only
373    /// state on the next check with no restart. Internal flush / compaction are
374    /// never gated (reserved headroom), so the engine can always reclaim space.
375    StorageFull {
376        /// Live on-disk bytes at the time of the check.
377        used: u64,
378
379        /// Effective byte budget that `used` (plus reserved headroom) exceeded.
380        limit: u64,
381    },
382}
383
384impl core::fmt::Display for Error {
385    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
386        write!(f, "LsmTreeError: {self:?}")
387    }
388}
389
390impl core::error::Error for Error {
391    fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
392        match self {
393            Self::Io(e) => Some(e),
394            _ => None,
395        }
396    }
397}
398
399impl From<crate::sfa::Error> for Error {
400    fn from(value: crate::sfa::Error) -> Self {
401        match value {
402            crate::sfa::Error::Io(e) => Self::from(e),
403            crate::sfa::Error::ChecksumMismatch { got, expected } => {
404                log::error!("Archive ToC checksum mismatch");
405                Self::ChecksumMismatch {
406                    got: got.into(),
407                    expected: expected.into(),
408                }
409            }
410            crate::sfa::Error::InvalidHeader => {
411                log::error!("Invalid archive header");
412                Self::Unrecoverable
413            }
414            crate::sfa::Error::InvalidVersion => {
415                log::error!("Invalid archive version");
416                Self::Unrecoverable
417            }
418            crate::sfa::Error::UnsupportedChecksumType => {
419                log::error!("Invalid archive checksum type");
420                Self::Unrecoverable
421            }
422        }
423    }
424}
425
426// The `Io` variant carries `crate::io::Error` (the no_std-capable I/O error),
427// so this bridge is a direct wrap. Std file-I/O paths surface `std::io::Error`;
428// the std-gated bridge below folds those through `crate::io::Error`.
429impl From<crate::io::Error> for Error {
430    fn from(value: crate::io::Error) -> Self {
431        Self::Io(value)
432    }
433}
434
435#[cfg(feature = "std")]
436impl From<std::io::Error> for Error {
437    fn from(value: std::io::Error) -> Self {
438        Self::Io(value.into())
439    }
440}
441
442/// Tree result
443pub type Result<T> = core::result::Result<T, Error>;