Skip to main content

nodedb_fts/index/
error.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Typed errors for `FtsIndex` operations.
4//!
5//! `FtsIndexError<E>` wraps both FTS-layer errors (e.g. surrogate out of range)
6//! and backend storage errors (`E = B::Error`). Callers that only use in-memory
7//! backends (tests, WASM) will have `E = std::convert::Infallible`.
8
9use thiserror::Error;
10
11use nodedb_types::Surrogate;
12
13use crate::search::query_parser::InvalidQuery;
14
15use nodedb_mem::MemError;
16
17/// Maximum `Surrogate` value that can be safely indexed.
18///
19/// FTS posting blocks store doc IDs as `u32` on disk (delta-encoded, bitpacked).
20/// The in-memory memtable uses the surrogate's raw `u32` as a direct index into
21/// per-doc fieldnorm arrays (`fieldnorms[surrogate.0 as usize]`). A surrogate
22/// near `u32::MAX` would cause the fieldnorm array to be resized to ~4 GiB,
23/// exhausting process memory.
24///
25/// The cap is set to `u32::MAX - 1` — the same ceiling as the graph CSR node-id
26/// policy. `u32::MAX` itself is reserved to make "invalid" sentinels
27/// representable in the `u32` space without aliasing a real doc. The shared
28/// `Surrogate::ZERO` sentinel (value 0) is also rejected at indexing time because
29/// it is the "unassigned" marker used by the Control Plane.
30///
31/// 4 billion documents per FTS index per collection is well beyond any practical
32/// workload. Collections approaching this limit should be partitioned.
33pub const MAX_INDEXABLE_SURROGATE: u32 = u32::MAX - 1;
34
35/// Errors returned by `FtsIndex` write operations.
36///
37/// `E` is the backend error type (`B::Error`). Backend errors are wrapped in
38/// `FtsIndexError::Backend` so callers get a single error type.
39#[derive(Debug, Error)]
40#[non_exhaustive]
41pub enum FtsIndexError<E: std::fmt::Display> {
42    /// The supplied `Surrogate` is outside the indexable range `1..=MAX_INDEXABLE_SURROGATE`.
43    ///
44    /// Either the zero sentinel (`Surrogate::ZERO`, meaning "not yet assigned")
45    /// or a value at `u32::MAX` was passed to `index_document`. The Control
46    /// Plane surrogate allocator must ensure surrogates are in range before
47    /// dispatching indexing operations.
48    #[error(
49        "surrogate {surrogate} is out of the indexable range \
50         1..={MAX_INDEXABLE_SURROGATE}; \
51         the zero value is the unassigned sentinel and u32::MAX is reserved"
52    )]
53    SurrogateOutOfRange { surrogate: Surrogate },
54
55    /// A term in the document exceeds the on-disk `u16` length cap.
56    ///
57    /// The FTS segment format encodes term lengths as `u16` (see
58    /// `lsm/segment/format.rs::MAX_TERM_LEN`). Terms longer than
59    /// `u16::MAX` (65 535 bytes) cannot be persisted. After analysis,
60    /// real-world terms are typically 2-20 bytes — exceeding this cap
61    /// indicates a malformed analyzer or adversarial input.
62    #[error("term length {len} exceeds maximum {max} bytes (FTS segment format limit)")]
63    TermTooLong { len: usize, max: usize },
64
65    /// The FTS query string is invalid (e.g. NOT-only, unsupported parentheses).
66    #[error("invalid FTS query: {0}")]
67    InvalidQuery(#[from] InvalidQuery),
68
69    /// An underlying backend storage operation failed.
70    #[error("FTS backend error: {0}")]
71    Backend(E),
72
73    /// A segment I/O or validation error not otherwise classified.
74    ///
75    /// Read-side variants (`BadMagic`, `UnsupportedVersion`, `ChecksumMismatch`,
76    /// `Truncated`) are not expected on the write/flush path but are propagated
77    /// here rather than panicking, so corrupt-state surprises surface as typed
78    /// errors at the public API boundary.
79    #[error("FTS segment error: {0}")]
80    Segment(crate::lsm::segment::error::SegmentError),
81
82    /// Memory budget exhausted for the FTS engine.
83    ///
84    /// The operation requires more memory than the engine's remaining budget
85    /// allows. Callers should backpressure, spill, or reject the request.
86    #[error("FTS memory budget exhausted: {0}")]
87    BudgetExhausted(MemError),
88}
89
90impl<E: std::fmt::Display> From<crate::lsm::segment::error::SegmentError> for FtsIndexError<E> {
91    fn from(err: crate::lsm::segment::error::SegmentError) -> Self {
92        use crate::lsm::segment::error::SegmentError;
93        match err {
94            SegmentError::TermTooLong { term_len, max } => {
95                FtsIndexError::TermTooLong { len: term_len, max }
96            }
97            other => FtsIndexError::Segment(other),
98        }
99    }
100}
101
102impl<E: std::fmt::Display> FtsIndexError<E> {
103    /// Wrap a backend error.
104    pub(crate) fn backend(e: E) -> Self {
105        Self::Backend(e)
106    }
107}
108
109impl<E: std::fmt::Display> From<MemError> for FtsIndexError<E> {
110    fn from(e: MemError) -> Self {
111        Self::BudgetExhausted(e)
112    }
113}