Skip to main content

tinyquant_bruteforce/
backend.rs

1//! `BruteForceBackend` — linear-scan `SearchBackend` implementation.
2//!
3//! Scans all stored vectors at query time. `O(n·d)` per query, no index
4//! structure. Suitable for corpora up to ~100 k vectors.
5
6use std::sync::Arc;
7
8use tinyquant_core::backend::{SearchBackend, SearchResult};
9use tinyquant_core::types::VectorId;
10
11use crate::errors::BackendError;
12use crate::similarity::cosine_similarity;
13use crate::store::OwnedStore;
14
15/// A brute-force search backend that computes cosine similarity against every
16/// stored vector on each query.
17///
18/// # Dimension locking
19///
20/// The first non-empty call to [`ingest`](BruteForceBackend::ingest) locks the
21/// dimension.  Subsequent calls with a different dimension return
22/// `Err(BackendError::Adapter("dimension mismatch: expected {d}, got {got}"))`.
23///
24/// # Python parity
25///
26/// - Overwriting an existing id is silent (matches Python `dict[id] = vec`).
27/// - `remove` with unknown ids is a no-op (matches Python `dict.pop(id, None)`).
28pub struct BruteForceBackend {
29    dim: Option<usize>,
30    store: OwnedStore,
31}
32
33impl BruteForceBackend {
34    /// Create a new, empty backend.
35    pub fn new() -> Self {
36        Self {
37            dim: None,
38            store: OwnedStore::new(),
39        }
40    }
41}
42
43impl Default for BruteForceBackend {
44    fn default() -> Self {
45        Self::new()
46    }
47}
48
49impl SearchBackend for BruteForceBackend {
50    fn ingest(&mut self, vectors: &[(VectorId, Vec<f32>)]) -> Result<(), BackendError> {
51        if vectors.is_empty() {
52            return Ok(());
53        }
54
55        for (id, vec) in vectors {
56            // Dimension lock: set on first vector, reject mismatches.
57            match self.dim {
58                None => {
59                    self.dim = Some(vec.len());
60                }
61                Some(expected) if vec.len() != expected => {
62                    return Err(BackendError::Adapter(Arc::from(format!(
63                        "dimension mismatch: expected {expected}, got {}",
64                        vec.len()
65                    ))));
66                }
67                Some(_) => {}
68            }
69            let arc_slice: Arc<[f32]> = Arc::from(vec.as_slice());
70            self.store.insert(Arc::clone(id), arc_slice);
71        }
72        Ok(())
73    }
74
75    fn search(&self, query: &[f32], top_k: usize) -> Result<Vec<SearchResult>, BackendError> {
76        if top_k == 0 {
77            return Err(BackendError::InvalidTopK);
78        }
79        if self.store.is_empty() {
80            return Ok(Vec::new());
81        }
82        // Dimension check: if locked, query must match.
83        if let Some(expected) = self.dim {
84            if query.len() != expected {
85                return Err(BackendError::Adapter(Arc::from(format!(
86                    "dimension mismatch: expected {expected}, got {}",
87                    query.len()
88                ))));
89            }
90        }
91
92        let mut results: Vec<SearchResult> = self
93            .store
94            .iter()
95            .map(|(id, vec)| {
96                let score = cosine_similarity(query, vec);
97                SearchResult {
98                    vector_id: Arc::clone(id),
99                    score,
100                }
101            })
102            .collect();
103
104        // Sort descending (SearchResult::Ord is descending by score).
105        results.sort();
106
107        results.truncate(top_k);
108        Ok(results)
109    }
110
111    fn remove(&mut self, vector_ids: &[VectorId]) -> Result<(), BackendError> {
112        for id in vector_ids {
113            self.store.remove(id);
114        }
115        Ok(())
116    }
117
118    fn len(&self) -> usize {
119        self.store.len()
120    }
121
122    fn dim(&self) -> Option<usize> {
123        self.dim
124    }
125}