Skip to main content

tinyquant_core/backend/
protocol.rs

1//! `SearchBackend` trait and `SearchResult` type.
2//!
3//! Every search backend — brute-force, pgvector, HNSW — implements
4//! [`SearchBackend`]. [`SearchResult`] is the return element from
5//! [`SearchBackend::search`]; it orders descending by score so that
6//! a max-heap or sort gives the nearest-first ranking callers expect.
7//!
8//! See `docs/design/behavior-layer/backend-protocol.md` for the full
9//! behavioural contract and Python-parity notes.
10
11use alloc::vec::Vec;
12
13use crate::errors::BackendError;
14use crate::types::VectorId;
15
16/// A single item returned by [`SearchBackend::search`].
17///
18/// `score` is the cosine similarity between the query and the stored
19/// vector.  Values range from −1.0 to 1.0; higher is better.  NaN
20/// inputs are mapped to `Equal` in the `Ord` impl so that a stable sort
21/// degrades gracefully rather than panicking.
22#[derive(Clone, Debug)]
23pub struct SearchResult {
24    /// The identifier of the matching stored vector.
25    pub vector_id: VectorId,
26    /// Cosine similarity score in [−1, 1].  Higher is better.
27    pub score: f32,
28}
29
30impl Ord for SearchResult {
31    /// Descending by score (highest score sorts first).
32    ///
33    /// NaN maps to `Equal` via `unwrap_or` so a stable sort degrades
34    /// gracefully rather than producing undefined behaviour.  Vector id is
35    /// used as a tiebreaker (ascending) so that `Ord` agrees with `PartialEq`
36    /// — the contract requires `a == b` iff `a.cmp(b) == Equal`.
37    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
38        // Descending by score; NaN collapses to Equal (stable sort preserves insertion order).
39        // Then tiebreak by vector_id ascending so Ord agrees with PartialEq.
40        other
41            .score
42            .partial_cmp(&self.score)
43            .unwrap_or(core::cmp::Ordering::Equal)
44            .then_with(|| self.vector_id.cmp(&other.vector_id))
45    }
46}
47
48impl PartialOrd for SearchResult {
49    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
50        Some(self.cmp(other))
51    }
52}
53
54impl PartialEq for SearchResult {
55    fn eq(&self, other: &Self) -> bool {
56        self.cmp(other) == core::cmp::Ordering::Equal
57    }
58}
59
60impl Eq for SearchResult {}
61
62/// The public contract every search backend must satisfy.
63///
64/// Implementors hold a mutable collection of `(VectorId, Vec<f32>)` pairs
65/// and expose a uniform ingest / search / remove interface.  Backends may
66/// be in-process (brute-force) or remote (pgvector); the trait hides the
67/// difference from callers.
68///
69/// # Thread safety
70///
71/// The trait requires `Send` so that backends can be moved between threads,
72/// e.g. wrapped in a `Mutex<dyn SearchBackend>`.  Individual implementations
73/// may add `Sync` if they wish.
74///
75/// # Errors
76///
77/// All methods return `Result<_, BackendError>`.  The caller decides whether
78/// to treat `BackendError::Empty` as a soft empty result or a hard failure.
79pub trait SearchBackend: Send {
80    /// Store the supplied `(VectorId, Vec<f32>)` pairs.
81    ///
82    /// - Dimension-locks on the first non-empty call; subsequent calls with a
83    ///   different dimension return
84    ///   `Err(BackendError::Adapter("dimension mismatch: expected {d}, got {got}"))`.
85    /// - Duplicate `VectorId`s overwrite the stored vector silently (Python
86    ///   parity).
87    /// - An empty slice is a no-op returning `Ok(())`.
88    ///
89    /// # Errors
90    ///
91    /// Returns `Err(BackendError::Adapter(_))` if the vectors' dimension does
92    /// not match the dimension locked by a previous `ingest` call.
93    fn ingest(&mut self, vectors: &[(VectorId, Vec<f32>)]) -> Result<(), BackendError>;
94
95    /// Return the `top_k` nearest vectors to `query` by cosine similarity,
96    /// sorted descending.
97    ///
98    /// - `top_k == 0` → `Err(BackendError::InvalidTopK)`.
99    /// - Empty backend → `Ok(vec![])`.
100    /// - If the backend contains fewer than `top_k` vectors, returns all of
101    ///   them sorted descending.
102    ///
103    /// # Errors
104    ///
105    /// Returns `Err(BackendError::InvalidTopK)` when `top_k == 0`.
106    /// Returns `Err(BackendError::Adapter(_))` when the query dimension does
107    /// not match the locked dimension, or on adapter-specific failures.
108    fn search(&self, query: &[f32], top_k: usize) -> Result<Vec<SearchResult>, BackendError>;
109
110    /// Remove vectors by id.
111    ///
112    /// Unknown ids are silently ignored (Python parity).  An empty slice is a
113    /// no-op returning `Ok(())`.
114    ///
115    /// # Errors
116    ///
117    /// Returns `Err(BackendError::Adapter(_))` on adapter-specific failures
118    /// (e.g. a lost database connection).  In-process backends always return
119    /// `Ok(())`.
120    fn remove(&mut self, vector_ids: &[VectorId]) -> Result<(), BackendError>;
121
122    /// Number of vectors currently stored.
123    fn len(&self) -> usize;
124
125    /// Returns `true` if no vectors are stored.
126    fn is_empty(&self) -> bool {
127        self.len() == 0
128    }
129
130    /// The locked dimension, or `None` if no vectors have been ingested yet.
131    fn dim(&self) -> Option<usize>;
132}