tinyquant_core/backend/protocol.rs
1//! `SearchBackend` trait and `SearchResult` type.
2//!
3//! Every search backend — brute-force, pgvector, HNSW — implements
4//! [`SearchBackend`]. [`SearchResult`] is the return element from
5//! [`SearchBackend::search`]; it orders descending by score so that
6//! a max-heap or sort gives the nearest-first ranking callers expect.
7//!
8//! See `docs/design/behavior-layer/backend-protocol.md` for the full
9//! behavioural contract and Python-parity notes.
10
11use alloc::vec::Vec;
12
13use crate::errors::BackendError;
14use crate::types::VectorId;
15
16/// A single item returned by [`SearchBackend::search`].
17///
18/// `score` is the cosine similarity between the query and the stored
19/// vector. Values range from −1.0 to 1.0; higher is better. NaN
20/// inputs are mapped to `Equal` in the `Ord` impl so that a stable sort
21/// degrades gracefully rather than panicking.
22#[derive(Clone, Debug)]
23pub struct SearchResult {
24 /// The identifier of the matching stored vector.
25 pub vector_id: VectorId,
26 /// Cosine similarity score in [−1, 1]. Higher is better.
27 pub score: f32,
28}
29
30impl Ord for SearchResult {
31 /// Descending by score (highest score sorts first).
32 ///
33 /// NaN maps to `Equal` via `unwrap_or` so a stable sort degrades
34 /// gracefully rather than producing undefined behaviour. Vector id is
35 /// used as a tiebreaker (ascending) so that `Ord` agrees with `PartialEq`
36 /// — the contract requires `a == b` iff `a.cmp(b) == Equal`.
37 fn cmp(&self, other: &Self) -> core::cmp::Ordering {
38 // Descending by score; NaN collapses to Equal (stable sort preserves insertion order).
39 // Then tiebreak by vector_id ascending so Ord agrees with PartialEq.
40 other
41 .score
42 .partial_cmp(&self.score)
43 .unwrap_or(core::cmp::Ordering::Equal)
44 .then_with(|| self.vector_id.cmp(&other.vector_id))
45 }
46}
47
48impl PartialOrd for SearchResult {
49 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
50 Some(self.cmp(other))
51 }
52}
53
54impl PartialEq for SearchResult {
55 fn eq(&self, other: &Self) -> bool {
56 self.cmp(other) == core::cmp::Ordering::Equal
57 }
58}
59
60impl Eq for SearchResult {}
61
62/// The public contract every search backend must satisfy.
63///
64/// Implementors hold a mutable collection of `(VectorId, Vec<f32>)` pairs
65/// and expose a uniform ingest / search / remove interface. Backends may
66/// be in-process (brute-force) or remote (pgvector); the trait hides the
67/// difference from callers.
68///
69/// # Thread safety
70///
71/// The trait requires `Send` so that backends can be moved between threads,
72/// e.g. wrapped in a `Mutex<dyn SearchBackend>`. Individual implementations
73/// may add `Sync` if they wish.
74///
75/// # Errors
76///
77/// All methods return `Result<_, BackendError>`. The caller decides whether
78/// to treat `BackendError::Empty` as a soft empty result or a hard failure.
79pub trait SearchBackend: Send {
80 /// Store the supplied `(VectorId, Vec<f32>)` pairs.
81 ///
82 /// - Dimension-locks on the first non-empty call; subsequent calls with a
83 /// different dimension return
84 /// `Err(BackendError::Adapter("dimension mismatch: expected {d}, got {got}"))`.
85 /// - Duplicate `VectorId`s overwrite the stored vector silently (Python
86 /// parity).
87 /// - An empty slice is a no-op returning `Ok(())`.
88 ///
89 /// # Errors
90 ///
91 /// Returns `Err(BackendError::Adapter(_))` if the vectors' dimension does
92 /// not match the dimension locked by a previous `ingest` call.
93 fn ingest(&mut self, vectors: &[(VectorId, Vec<f32>)]) -> Result<(), BackendError>;
94
95 /// Return the `top_k` nearest vectors to `query` by cosine similarity,
96 /// sorted descending.
97 ///
98 /// - `top_k == 0` → `Err(BackendError::InvalidTopK)`.
99 /// - Empty backend → `Ok(vec![])`.
100 /// - If the backend contains fewer than `top_k` vectors, returns all of
101 /// them sorted descending.
102 ///
103 /// # Errors
104 ///
105 /// Returns `Err(BackendError::InvalidTopK)` when `top_k == 0`.
106 /// Returns `Err(BackendError::Adapter(_))` when the query dimension does
107 /// not match the locked dimension, or on adapter-specific failures.
108 fn search(&self, query: &[f32], top_k: usize) -> Result<Vec<SearchResult>, BackendError>;
109
110 /// Remove vectors by id.
111 ///
112 /// Unknown ids are silently ignored (Python parity). An empty slice is a
113 /// no-op returning `Ok(())`.
114 ///
115 /// # Errors
116 ///
117 /// Returns `Err(BackendError::Adapter(_))` on adapter-specific failures
118 /// (e.g. a lost database connection). In-process backends always return
119 /// `Ok(())`.
120 fn remove(&mut self, vector_ids: &[VectorId]) -> Result<(), BackendError>;
121
122 /// Number of vectors currently stored.
123 fn len(&self) -> usize;
124
125 /// Returns `true` if no vectors are stored.
126 fn is_empty(&self) -> bool {
127 self.len() == 0
128 }
129
130 /// The locked dimension, or `None` if no vectors have been ingested yet.
131 fn dim(&self) -> Option<usize>;
132}