nodedb_fts/index/error.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! Typed errors for `FtsIndex` operations.
4//!
5//! `FtsIndexError<E>` wraps both FTS-layer errors (e.g. surrogate out of range)
6//! and backend storage errors (`E = B::Error`). Callers that only use in-memory
7//! backends (tests, WASM) will have `E = std::convert::Infallible`.
8
9use thiserror::Error;
10
11use nodedb_types::Surrogate;
12
13use crate::search::query_parser::InvalidQuery;
14
15use nodedb_mem::MemError;
16
17/// Maximum `Surrogate` value that can be safely indexed.
18///
19/// FTS posting blocks store doc IDs as `u32` on disk (delta-encoded, bitpacked).
20/// The in-memory memtable uses the surrogate's raw `u32` as a direct index into
21/// per-doc fieldnorm arrays (`fieldnorms[surrogate.0 as usize]`). A surrogate
22/// near `u32::MAX` would cause the fieldnorm array to be resized to ~4 GiB,
23/// exhausting process memory.
24///
25/// The cap is set to `u32::MAX - 1` — the same ceiling as the graph CSR node-id
26/// policy. `u32::MAX` itself is reserved to make "invalid" sentinels
27/// representable in the `u32` space without aliasing a real doc. The shared
28/// `Surrogate::ZERO` sentinel (value 0) is also rejected at indexing time because
29/// it is the "unassigned" marker used by the Control Plane.
30///
31/// 4 billion documents per FTS index per collection is well beyond any practical
32/// workload. Collections approaching this limit should be partitioned.
33pub const MAX_INDEXABLE_SURROGATE: u32 = u32::MAX - 1;
34
35/// Errors returned by `FtsIndex` write operations.
36///
37/// `E` is the backend error type (`B::Error`). Backend errors are wrapped in
38/// `FtsIndexError::Backend` so callers get a single error type.
39#[derive(Debug, Error)]
40#[non_exhaustive]
41pub enum FtsIndexError<E: std::fmt::Display> {
42 /// The supplied `Surrogate` is outside the indexable range `1..=MAX_INDEXABLE_SURROGATE`.
43 ///
44 /// Either the zero sentinel (`Surrogate::ZERO`, meaning "not yet assigned")
45 /// or a value at `u32::MAX` was passed to `index_document`. The Control
46 /// Plane surrogate allocator must ensure surrogates are in range before
47 /// dispatching indexing operations.
48 #[error(
49 "surrogate {surrogate} is out of the indexable range \
50 1..={MAX_INDEXABLE_SURROGATE}; \
51 the zero value is the unassigned sentinel and u32::MAX is reserved"
52 )]
53 SurrogateOutOfRange { surrogate: Surrogate },
54
55 /// A term in the document exceeds the on-disk `u16` length cap.
56 ///
57 /// The FTS segment format encodes term lengths as `u16` (see
58 /// `lsm/segment/format.rs::MAX_TERM_LEN`). Terms longer than
59 /// `u16::MAX` (65 535 bytes) cannot be persisted. After analysis,
60 /// real-world terms are typically 2-20 bytes — exceeding this cap
61 /// indicates a malformed analyzer or adversarial input.
62 #[error("term length {len} exceeds maximum {max} bytes (FTS segment format limit)")]
63 TermTooLong { len: usize, max: usize },
64
65 /// The FTS query string is invalid (e.g. NOT-only, unsupported parentheses).
66 #[error("invalid FTS query: {0}")]
67 InvalidQuery(#[from] InvalidQuery),
68
69 /// An underlying backend storage operation failed.
70 #[error("FTS backend error: {0}")]
71 Backend(E),
72
73 /// A segment I/O or validation error not otherwise classified.
74 ///
75 /// Read-side variants (`BadMagic`, `UnsupportedVersion`, `ChecksumMismatch`,
76 /// `Truncated`) are not expected on the write/flush path but are propagated
77 /// here rather than panicking, so corrupt-state surprises surface as typed
78 /// errors at the public API boundary.
79 #[error("FTS segment error: {0}")]
80 Segment(crate::lsm::segment::error::SegmentError),
81
82 /// Memory budget exhausted for the FTS engine.
83 ///
84 /// The operation requires more memory than the engine's remaining budget
85 /// allows. Callers should backpressure, spill, or reject the request.
86 #[error("FTS memory budget exhausted: {0}")]
87 BudgetExhausted(MemError),
88}
89
90impl<E: std::fmt::Display> From<crate::lsm::segment::error::SegmentError> for FtsIndexError<E> {
91 fn from(err: crate::lsm::segment::error::SegmentError) -> Self {
92 use crate::lsm::segment::error::SegmentError;
93 match err {
94 SegmentError::TermTooLong { term_len, max } => {
95 FtsIndexError::TermTooLong { len: term_len, max }
96 }
97 other => FtsIndexError::Segment(other),
98 }
99 }
100}
101
102impl<E: std::fmt::Display> FtsIndexError<E> {
103 /// Wrap a backend error.
104 pub(crate) fn backend(e: E) -> Self {
105 Self::Backend(e)
106 }
107}
108
109impl<E: std::fmt::Display> From<MemError> for FtsIndexError<E> {
110 fn from(e: MemError) -> Self {
111 Self::BudgetExhausted(e)
112 }
113}